{"id":"https://openalex.org/W4368754606","doi":"https://doi.org/10.48550/arxiv.2305.01598","title":"From Words to Code: Harnessing Data for Program Synthesis from Natural Language","display_name":"From Words to Code: Harnessing Data for Program Synthesis from Natural Language","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4368754606","doi":"https://doi.org/10.48550/arxiv.2305.01598"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.01598","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2305.01598","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017987283","display_name":"Anirudh Khatry","orcid":"https://orcid.org/0009-0004-7773-4405"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Khatry, Anirudh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012221715","display_name":"Joyce Cahoon","orcid":"https://orcid.org/0000-0001-7217-4702"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cahoon, Joyce","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041047312","display_name":"Jordan Henkel","orcid":"https://orcid.org/0000-0003-3862-249X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Henkel, Jordan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036934715","display_name":"Shaleen Deep","orcid":"https://orcid.org/0000-0003-2342-4060"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deep, Shaleen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025875799","display_name":"Venkatesh Emani","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Emani, Venkatesh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020678151","display_name":"Avrilia Floratou","orcid":"https://orcid.org/0009-0007-5760-8657"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Floratou, Avrilia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011543162","display_name":"Sumit Gulwani","orcid":"https://orcid.org/0000-0002-9226-9634"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gulwani, Sumit","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101652419","display_name":"Vu Le","orcid":"https://orcid.org/0000-0003-1625-4200"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Le, Vu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047934128","display_name":"Mohammad Raza","orcid":"https://orcid.org/0000-0001-7655-6222"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Raza, Mohammad","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066720336","display_name":"Sherry Shi","orcid":"https://orcid.org/0009-0001-3895-6687"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Sherry","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101442631","display_name":"Mukul Singh","orcid":"https://orcid.org/0000-0002-6709-5396"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Singh, Mukul","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5086308451","display_name":"Ashish Tiwari","orcid":"https://orcid.org/0000-0002-5153-2686"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tiwari, Ashish","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.925733,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":87,"max":90},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9928,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9863,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interleaving","display_name":"Interleaving","score":0.51200086},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.48195192}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8273121},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.64555097},{"id":"https://openalex.org/C510870499","wikidata":"https://www.wikidata.org/wiki/Q47607","display_name":"SQL","level":2,"score":0.61318296},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5635037},{"id":"https://openalex.org/C28034677","wikidata":"https://www.wikidata.org/wiki/Q17092530","display_name":"Interleaving","level":2,"score":0.51200086},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.49211568},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.48195192},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4506252},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44719929},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.329381},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.31304556},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.01598","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2305.01598","pdf_url":"http://arxiv.org/pdf/2305.01598","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2305.01598","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.01598","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2814468324","https://openalex.org/W2463883322","https://openalex.org/W2391789612","https://openalex.org/W2389236462","https://openalex.org/W2389051085","https://openalex.org/W2330343234","https://openalex.org/W2229382548","https://openalex.org/W2159231020","https://openalex.org/W1901012776","https://openalex.org/W1655266410"],"abstract_inverted_index":{"Creating":[0],"programs":[1,116,134,155],"to":[2,20,70,100,112,131,171,184,255],"correctly":[3],"manipulate":[4],"data":[5,46,81,95,111,227],"is":[6,69],"a":[7,84,129,238],"difficult":[8],"task,":[9,59],"as":[10],"the":[11,45,51,57,63,67,74,93,97,101,108,114,119,133,143,165,181,188],"underlying":[12],"programming":[13],"languages":[14],"and":[15,121,149,174,213,230,242,260],"APIs":[16],"can":[17],"be":[18,71],"challenging":[19],"learn":[21],"for":[22,37,187,195],"many":[23],"users":[24],"who":[25],"are":[26],"not":[27],"skilled":[28],"programmers.":[29],"Large":[30],"language":[31,53],"models":[32],"(LLMs)":[33],"demonstrate":[34],"remarkable":[35],"potential":[36],"generating":[38],"code":[39],"from":[40,50,92,167],"natural":[41,52],"language,":[42],"but":[43],"in":[44,83,221,257,262],"manipulation":[47],"domain,":[48],"apart":[49],"(NL)":[54],"description":[55],"of":[56,240,253],"intended":[58],"we":[60,106,204],"also":[61,199],"have":[62,79],"dataset":[64],"on":[65,139,237],"which":[66],"task":[68],"performed,":[72],"or":[73],"\"data":[75],"context\".":[76],"Existing":[77],"approaches":[78],"utilized":[80],"context":[82],"limited":[85],"way":[86],"by":[87,118,136,208],"simply":[88],"adding":[89],"relevant":[90],"information":[91],"input":[94,110],"into":[96],"prompts":[98],"sent":[99],"LLM.":[102],"In":[103],"this":[104],"work,":[105],"utilize":[107],"available":[109],"execute":[113],"candidate":[115],"generated":[117,135,207],"LLMs":[120,137,209],"gather":[122],"their":[123],"outputs.":[124],"We":[125,191,198,216,245],"introduce":[126,200],"semantic":[127,147,161,196],"reranking,":[128],"technique":[130],"rerank":[132],"based":[138,151],"three":[140,222],"signals":[141],"coming":[142],"program":[144],"outputs:":[145],"(a)":[146],"filtering":[148],"well-formedness":[150],"score":[152,177],"tuning:":[153,178],"do":[154,164,180],"even":[156],"generate":[157],"well-formed":[158],"outputs,":[159],"(b)":[160],"interleaving:":[162],"how":[163,179],"outputs":[166,182,185],"different":[168],"candidates":[169],"compare":[170,183],"each":[172],"other,":[173],"(c)":[175],"output-based":[176],"predicted":[186],"same":[189],"task.":[190],"provide":[192],"theoretical":[193],"justification":[194],"interleaving.":[197],"temperature":[201],"mixing,":[202],"where":[203],"combine":[205],"samples":[206],"using":[210],"both":[211],"high":[212],"low":[214],"temperatures.":[215],"extensively":[217],"evaluate":[218],"our":[219],"approach":[220],"domains,":[223,250],"namely":[224],"databases":[225],"(SQL),":[226],"science":[228],"(Pandas)":[229],"business":[231],"intelligence":[232],"(Excel's":[233],"Power":[234],"Query":[235],"M)":[236],"variety":[239],"new":[241],"existing":[243],"benchmarks.":[244],"observe":[246],"substantial":[247],"gains":[248],"across":[249],"with":[251],"improvements":[252],"up":[254],"45%":[256],"top-1":[258],"accuracy":[259],"34%":[261],"top-3":[263],"accuracy.":[264]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4368754606","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3}],"updated_date":"2025-01-21T00:42:58.202628","created_date":"2023-05-05"}