{"id":"https://openalex.org/W4391591578","doi":"https://doi.org/10.48550/arxiv.2402.01391","title":"StepCoder: Improve Code Generation with Reinforcement Learning from\n Compiler Feedback","display_name":"StepCoder: Improve Code Generation with Reinforcement Learning from\n Compiler Feedback","publication_year":2024,"publication_date":"2024-02-02","ids":{"openalex":"https://openalex.org/W4391591578","doi":"https://doi.org/10.48550/arxiv.2402.01391"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2402.01391","pdf_url":"https://arxiv.org/pdf/2402.01391","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2402.01391","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083173421","display_name":"Shihan Dou","orcid":"https://orcid.org/0009-0002-6013-3035"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dou, Shihan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100768487","display_name":"Yan Liu","orcid":"https://orcid.org/0000-0003-2649-2552"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102703851","display_name":"Haoxiang Jia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jia, Haoxiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060618975","display_name":"Limao Xiong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiong, Limao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087033501","display_name":"Enyu Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Enyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100775327","display_name":"Wei L. Shen","orcid":"https://orcid.org/0000-0002-3785-2245"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Wei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101125334","display_name":"Junjie Shan","orcid":"https://orcid.org/0009-0007-0430-1133"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shan, Junjie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016693603","display_name":"Caishuang Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Caishuang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100411431","display_name":"Xiao Wang","orcid":"https://orcid.org/0000-0001-6545-1943"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Xiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101588749","display_name":"Xiaoran Fan","orcid":"https://orcid.org/0000-0002-6368-9250"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fan, Xiaoran","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036875405","display_name":"Zhiheng Xi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xi, Zhiheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053105889","display_name":"Yuhao Zhou","orcid":"https://orcid.org/0000-0001-8074-6416"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Yuhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101794864","display_name":"Tao Ji","orcid":"https://orcid.org/0000-0002-7671-5921"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ji, Tao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055879965","display_name":"Rui Zheng","orcid":"https://orcid.org/0000-0002-4225-7133"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Rui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014306843","display_name":"Qi Zhang","orcid":"https://orcid.org/0000-0002-5203-8778"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Qi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088834359","display_name":"Xuanjing Huang","orcid":"https://orcid.org/0000-0001-9197-9426"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Xuanjing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5058353652","display_name":"Tao Gui","orcid":"https://orcid.org/0000-0002-6154-0751"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gui, Tao","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.913014,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":77,"max":88},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9774,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9774,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9344,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10820","display_name":"Fuzzy Logic and Control Systems","score":0.9339,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5090072}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7084827},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.67965645},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.64900005},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5090072},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.46525982},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.27544612},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2402.01391","pdf_url":"https://arxiv.org/pdf/2402.01391","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2402.01391","pdf_url":"https://arxiv.org/pdf/2402.01391","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4379536100","https://openalex.org/W4240253816","https://openalex.org/W3096456556","https://openalex.org/W2748952813","https://openalex.org/W2245390655","https://openalex.org/W2171648479","https://openalex.org/W2132806979","https://openalex.org/W188282463","https://openalex.org/W1713081424","https://openalex.org/W1523769955"],"abstract_inverted_index":{"The":[0],"advancement":[1],"of":[2,12,29,90,110,149],"large":[3],"language":[4],"models":[5],"(LLMs)":[6],"has":[7],"significantly":[8],"propelled":[9],"the":[10,26,37,56,62,96,101,118,122,135,147,159,163],"field":[11],"code":[13,33,39,71,87,104,124],"generation.":[14],"Previous":[15],"work":[16],"integrated":[17],"reinforcement":[18],"learning":[19],"(RL)":[20],"with":[21],"compiler":[22],"feedback":[23],"for":[24,86,138],"exploring":[25],"output":[27,164],"space":[28,165],"LLMs":[30,42,66],"to":[31,45,126,145,161],"enhance":[32],"generation":[34,105],"quality.":[35],"However,":[36],"lengthy":[38],"generated":[40],"by":[41,67,99,120],"in":[43,170],"response":[44],"complex":[46],"human":[47],"requirements":[48],"makes":[49],"RL":[50,84,139],"exploration":[51,97],"a":[52,82,108],"challenge.":[53],"Also,":[54],"since":[55],"unit":[57,150],"tests":[58],"may":[59],"not":[60],"cover":[61],"complicated":[63],"code,":[64],"optimizing":[65],"using":[68],"these":[69,77],"unexecuted":[70,123],"snippets":[72],"is":[73,142],"ineffective.":[74],"To":[75],"tackle":[76],"challenges,":[78],"we":[79,132],"introduce":[80],"StepCoder,":[81],"novel":[83],"framework":[85],"generation,":[88],"consisting":[89],"two":[91],"main":[92],"components:":[93],"CCCS":[94],"addresses":[95],"challenge":[98],"breaking":[100],"long":[102],"sequences":[103],"task":[106],"into":[107],"Curriculum":[109],"Code":[111],"Completion":[112],"Subtasks,":[113],"while":[114],"FGO":[115],"only":[116],"optimizes":[117],"model":[119],"masking":[121],"segments":[125],"provide":[127],"Fine-Grained":[128],"Optimization.":[129],"In":[130],"addition,":[131],"furthermore":[133],"construct":[134],"APPS+":[136,175],"dataset":[137,174],"training,":[140],"which":[141],"manually":[143],"verified":[144],"ensure":[146],"correctness":[148],"tests.":[151],"Experimental":[152],"results":[153],"show":[154],"that":[155],"our":[156],"method":[157],"improves":[158],"ability":[160],"explore":[162],"and":[166,176],"outperforms":[167],"state-of-the-art":[168],"approaches":[169],"corresponding":[171],"benchmarks.":[172],"Our":[173],"StepCoder":[177],"are":[178],"available":[179],"online.":[180]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4391591578","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-04-20T02:22:43.111615","created_date":"2024-02-07"}