{"id":"https://openalex.org/W4393111402","doi":"https://doi.org/10.48550/arxiv.2403.13863","title":"DiffImpute: Tabular Data Imputation With Denoising Diffusion\n Probabilistic Model","display_name":"DiffImpute: Tabular Data Imputation With Denoising Diffusion\n Probabilistic Model","publication_year":2024,"publication_date":"2024-03-20","ids":{"openalex":"https://openalex.org/W4393111402","doi":"https://doi.org/10.48550/arxiv.2403.13863"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2403.13863","pdf_url":"https://arxiv.org/pdf/2403.13863","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2403.13863","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111363143","display_name":"Yizhu Wen","orcid":"https://orcid.org/0009-0008-0479-4991"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wen, Yizhu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100765686","display_name":"Kai Yi","orcid":"https://orcid.org/0000-0003-0415-3584"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yi, Kai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100622704","display_name":"Jing Ke","orcid":"https://orcid.org/0000-0001-7459-257X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ke, Jing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5072622467","display_name":"Yiqing Shen","orcid":"https://orcid.org/0000-0001-7866-3339"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Yiqing","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11901","display_name":"Bayesian Methods and Mixture Models","score":0.1076,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11901","display_name":"Bayesian Methods and Mixture Models","score":0.1076,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/imputation","display_name":"Imputation (statistics)","score":0.6909162}],"concepts":[{"id":"https://openalex.org/C58041806","wikidata":"https://www.wikidata.org/wiki/Q1660484","display_name":"Imputation (statistics)","level":3,"score":0.6909162},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.63958085},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5288618},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.45735896},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3977986},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34392852},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.16877949},{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.09986454}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2403.13863","pdf_url":"https://arxiv.org/pdf/2403.13863","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2403.13863","pdf_url":"https://arxiv.org/pdf/2403.13863","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4211215373","https://openalex.org/W3217094455","https://openalex.org/W3123325766","https://openalex.org/W3119637569","https://openalex.org/W2989589450","https://openalex.org/W2898623405","https://openalex.org/W2791189374","https://openalex.org/W2405773734","https://openalex.org/W2374234271","https://openalex.org/W2058928557"],"abstract_inverted_index":{"Tabular":[0],"data":[1,129,133],"plays":[2],"a":[3,46,154,210,215],"crucial":[4],"role":[5],"in":[6,35,104],"various":[7,85],"domains":[8],"but":[9],"often":[10],"suffers":[11],"from":[12],"missing":[13,69],"values,":[14],"thereby":[15],"curtailing":[16],"its":[17,188],"potential":[18],"utility.":[19],"Traditional":[20],"imputation":[21,150],"techniques":[22],"frequently":[23],"yield":[24],"suboptimal":[25],"results":[26],"and":[27,93,116,127,135,196,214],"impose":[28],"substantial":[29],"computational":[30],"burdens,":[31],"leading":[32],"to":[33,84,122],"inaccuracies":[34],"subsequent":[36],"modeling":[37],"tasks.":[38],"To":[39,98,144],"address":[40],"these":[41],"challenges,":[42],"we":[43,106,152],"propose":[44,120,153],"DiffImpute,":[45],"novel":[47],"Denoising":[48],"Diffusion":[49],"Probabilistic":[50],"Model":[51],"(DDPM).":[52],"Specifically,":[53,175],"DiffImpute":[54],"is":[55,222],"trained":[56],"on":[57,166],"complete":[58],"tabular":[59,102,109],"datasets,":[60],"ensuring":[61],"that":[62,159],"it":[63,80,185],"can":[64,81],"produce":[65],"credible":[66],"imputations":[67],"for":[68],"entries":[70],"without":[71],"undermining":[72],"the":[73,76,101,132,141,171,179,182,197,204],"authenticity":[74],"of":[75,87,173,194,212,218],"existing":[77],"data.":[78],"Innovatively,":[79],"be":[82],"applied":[83],"settings":[86],"Missing":[88,94],"Completely":[89],"At":[90,95],"Random":[91,96],"(MCAR)":[92],"(MAR).":[97],"effectively":[99],"handle":[100],"features":[103],"DDPM,":[105],"tailor":[107],"four":[108],"denoising":[110,136,183],"networks,":[111],"spanning":[112],"MLP,":[113],"ResNet,":[114],"Transformer,":[115],"U-Net.":[117],"We":[118],"also":[119],"Harmonization":[121],"enhance":[123],"coherence":[124],"between":[125],"observed":[126],"imputed":[128],"by":[130],"infusing":[131],"back":[134],"them":[137],"multiple":[138],"times":[139],"during":[140],"sampling":[142,157],"stage.":[143],"enable":[145],"efficient":[146],"inference":[147],"while":[148],"maintaining":[149],"performance,":[151],"refined":[155],"non-Markovian":[156],"process":[158],"works":[160],"along":[161],"with":[162,178,209],"Harmonization.":[163],"Empirical":[164],"evaluations":[165],"seven":[167],"diverse":[168],"datasets":[169],"underscore":[170],"prowess":[172],"DiffImpute.":[174],"when":[176],"paired":[177],"Transformer":[180],"as":[181],"network,":[184],"consistently":[186],"outperforms":[187],"competitors,":[189],"boasting":[190],"an":[191],"average":[192],"ranking":[193,211],"1.7":[195],"most":[198],"minimal":[199],"standard":[200,216],"deviation.":[201],"In":[202],"contrast,":[203],"next":[205],"best":[206],"method":[207],"lags":[208],"2.8":[213],"deviation":[217],"0.9.":[219],"The":[220],"code":[221],"available":[223],"at":[224],"https://github.com/Dendiiiii/DiffImpute.":[225]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4393111402","counts_by_year":[],"updated_date":"2024-12-16T06:08:23.595556","created_date":"2024-03-24"}