{"id":"https://openalex.org/W4393719289","doi":"https://doi.org/10.48550/arxiv.2403.19898","title":"Structure Matters: Tackling the Semantic Discrepancy in Diffusion Models\n for Image Inpainting","display_name":"Structure Matters: Tackling the Semantic Discrepancy in Diffusion Models\n for Image Inpainting","publication_year":2024,"publication_date":"2024-03-28","ids":{"openalex":"https://openalex.org/W4393719289","doi":"https://doi.org/10.48550/arxiv.2403.19898"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2403.19898","pdf_url":"https://arxiv.org/pdf/2403.19898","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2403.19898","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100606627","display_name":"Haipeng Liu","orcid":"https://orcid.org/0000-0003-3234-963X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Haipeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100714578","display_name":"Yang Wang","orcid":"https://orcid.org/0000-0002-6815-0879"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101258960","display_name":"Biao Qian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qian, Biao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100377147","display_name":"Meng Wang","orcid":"https://orcid.org/0000-0002-3094-7735"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Meng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100728762","display_name":"Yong Rui","orcid":"https://orcid.org/0000-0002-9142-5914"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rui, Yong","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":{"min":0,"max":77},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9277,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9277,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inpainting","display_name":"Inpainting","score":0.9219996}],"concepts":[{"id":"https://openalex.org/C11727466","wikidata":"https://www.wikidata.org/wiki/Q1628157","display_name":"Inpainting","level":3,"score":0.9219996},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.60357225},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.48154527},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45405748},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.39300117},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3390997},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.090621114},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.056814015}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2403.19898","pdf_url":"https://arxiv.org/pdf/2403.19898","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2403.19898","pdf_url":"https://arxiv.org/pdf/2403.19898","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W49967185","https://openalex.org/W425542480","https://openalex.org/W3178025616","https://openalex.org/W3035059915","https://openalex.org/W2946160871","https://openalex.org/W2135359786","https://openalex.org/W2131831293","https://openalex.org/W2060947339","https://openalex.org/W2017457812","https://openalex.org/W1995073329"],"abstract_inverted_index":{"Denoising":[0],"diffusion":[1,75,121],"probabilistic":[2],"models":[3],"for":[4,141,181],"image":[5,16,142],"inpainting":[6],"aim":[7,88],"to":[8,12,61,70,78,89,100,105,125,135,153,205,240],"add":[9],"the":[10,13,18,29,32,37,41,46,55,66,71,79,102,107,127,147,170,177,182,188,192,196,207,213,216,242,254,259],"noise":[11,73],"texture":[14,30,59,95,129,162,183,243],"of":[15,28,191,215,256],"during":[17],"forward":[19],"process":[20,131],"and":[21,51,109,221],"recover":[22],"masked":[23,50,67,220],"regions":[24,68,174],"with":[25,98],"unmasked":[26,52,58,92,173,222],"ones":[27],"via":[31],"reverse":[33],"denoising":[34,96,130,139,184,197,209,244],"process.":[35],"Despite":[36],"meaningful":[38,110],"semantics":[39,93,111,165,171],"generation,":[40],"existing":[42],"arts":[43],"suffer":[44],"from":[45,172,187],"semantic":[47,103,155,249],"discrepancy":[48,81,156],"between":[49,82,219],"regions,":[53],"since":[54],"semantically":[56,148],"dense":[57,161],"fails":[60],"be":[62],"completely":[63],"degraded":[64],"while":[65,144,160,246],"turn":[69],"pure":[72],"in":[74,157,166],"process,":[76,185,198,245],"leading":[77],"large":[80],"them.":[83],"In":[84],"this":[85,114],"paper,":[86],"we":[87,116,225],"answer":[90],"how":[91,99],"guide":[94,241],"process;together":[97],"tackle":[101,154],"discrepancy,":[104],"facilitate":[106],"consistent":[108],"generation.":[112],"To":[113],"end,":[115],"propose":[117],"a":[118,137,199,232],"novel":[119],"structure-guided":[120,200],"model":[122],"named":[123],"StrDiffusion,":[124],"reformulate":[126],"conventional":[128],"under":[132],"structure":[133,150,179,193,218,237],"guidance":[134,180],"derive":[136],"simplified":[138,208],"objective":[140,210],"inpainting,":[143],"revealing:":[145],"1)":[146],"sparse":[149],"is":[151,203,238,263],"beneficial":[152],"early":[158],"stage,":[159],"generates":[163],"reasonable":[164],"late":[167],"stage;":[168],"2)":[169],"essentially":[175],"offer":[176],"time-dependent":[178,189],"benefiting":[186],"sparsity":[190],"semantics.":[194],"For":[195],"neural":[201],"network":[202],"trained":[204],"estimate":[206],"by":[211],"exploiting":[212],"consistency":[214],"denoised":[217],"regions.":[223],"Besides,":[224],"devise":[226],"an":[227],"adaptive":[228],"resampling":[229],"strategy":[230],"as":[231,235],"formal":[233],"criterion":[234],"whether":[236],"competent":[239],"regulate":[247],"their":[248],"correlations.":[250],"Extensive":[251],"experiments":[252],"validate":[253],"merits":[255],"StrDiffusion":[257],"over":[258],"state-of-the-arts.":[260],"Our":[261],"code":[262],"available":[264],"at":[265],"https://github.com/htyjers/StrDiffusion.":[266]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4393719289","counts_by_year":[],"updated_date":"2025-04-21T11:01:56.777473","created_date":"2024-04-03"}