{"id":"https://openalex.org/W4395021849","doi":"https://doi.org/10.48550/arxiv.2404.12404","title":"Group-wise Prompting for Synthetic Tabular Data Generation using Large\n Language Models","display_name":"Group-wise Prompting for Synthetic Tabular Data Generation using Large\n Language Models","publication_year":2024,"publication_date":"2024-04-15","ids":{"openalex":"https://openalex.org/W4395021849","doi":"https://doi.org/10.48550/arxiv.2404.12404"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.12404","pdf_url":"http://arxiv.org/pdf/2404.12404","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2404.12404","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100376579","display_name":"Jin-Hee Kim","orcid":"https://orcid.org/0000-0001-6061-5363"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Jinhee","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100643810","display_name":"Tae-Sung Kim","orcid":"https://orcid.org/0000-0002-6260-4972"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Taesung","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5047912015","display_name":"Jaegul Choo","orcid":"https://orcid.org/0000-0003-1071-4835"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Choo, Jaegul","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.901841,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":77,"max":88},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9972,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9972,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9901,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9442,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C2781311116","wikidata":"https://www.wikidata.org/wiki/Q83306","display_name":"Group (periodic table)","level":2,"score":0.57358557},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.55465657},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5370013},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42016482},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3233323},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.08919844},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.070080966},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.12404","pdf_url":"http://arxiv.org/pdf/2404.12404","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.12404","pdf_url":"http://arxiv.org/pdf/2404.12404","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4395014643","https://openalex.org/W4391913857","https://openalex.org/W4391375266","https://openalex.org/W3204019825","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2382290278","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2001405890"],"abstract_inverted_index":{"Generating":[0],"realistic":[1],"synthetic":[2,28,91],"tabular":[3,144],"data":[4,32,55,145],"presents":[5],"a":[6,15,37],"critical":[7],"challenge":[8],"in":[9,42,109,140],"machine":[10,136],"learning.":[11],"This":[12,126],"study":[13],"introduces":[14],"simple":[16],"yet":[17],"effective":[18],"method":[19,41,97],"employing":[20],"Large":[21],"Language":[22],"Models":[23],"(LLMs)":[24],"tailored":[25],"to":[26,53,59,130],"generate":[27],"data,":[29],"specifically":[30],"addressing":[31,131],"imbalance":[33],"problems.":[34],"We":[35],"propose":[36],"novel":[38],"group-wise":[39],"prompting":[40],"CSV-style":[43],"formatting":[44],"that":[45,56],"leverages":[46],"the":[47,60,66,78,85,90,132,141],"in-context":[48],"learning":[49,137],"capabilities":[50],"of":[51,65,80,89,95,135,143],"LLMs":[52],"produce":[54],"closely":[57],"adheres":[58],"specified":[61],"requirements":[62],"and":[63,87,112,119,147],"characteristics":[64],"target":[67],"dataset.":[68],"Moreover,":[69],"our":[70,96,155],"proposed":[71],"random":[72],"word":[73],"replacement":[74],"strategy":[75],"significantly":[76,128],"improves":[77],"handling":[79,148],"monotonous":[81],"categorical":[82],"values,":[83],"enhancing":[84],"accuracy":[86],"representativeness":[88],"data.":[92],"The":[93,151],"effectiveness":[94],"is":[98,157],"extensively":[99],"validated":[100],"across":[101],"eight":[102],"real-world":[103],"public":[104],"datasets,":[105],"achieving":[106],"state-of-the-art":[107],"performance":[108],"downstream":[110],"classification":[111],"regression":[113],"tasks":[114],"while":[115],"maintaining":[116],"inter-feature":[117],"correlations":[118],"improving":[120],"token":[121],"efficiency":[122],"over":[123],"existing":[124],"approaches.":[125],"advancement":[127],"contributes":[129],"key":[133],"challenges":[134],"applications,":[138],"particularly":[139],"context":[142],"generation":[146],"class":[149],"imbalance.":[150],"source":[152],"code":[153],"for":[154],"work":[156],"available":[158],"at:":[159],"https://github.com/seharanul17/synthetic-tabular-LLM":[160]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4395021849","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-04-16T16:19:23.370268","created_date":"2024-04-23"}