{"id":"https://openalex.org/W4392576563","doi":"https://doi.org/10.48550/arxiv.2403.03894","title":"IRCoder: Intermediate Representations Make Language Models Robust\n Multilingual Code Generators","display_name":"IRCoder: Intermediate Representations Make Language Models Robust\n Multilingual Code Generators","publication_year":2024,"publication_date":"2024-03-06","ids":{"openalex":"https://openalex.org/W4392576563","doi":"https://doi.org/10.48550/arxiv.2403.03894"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2403.03894","pdf_url":"https://arxiv.org/pdf/2403.03894","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2403.03894","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022743339","display_name":"Indraneil Paul","orcid":"https://orcid.org/0000-0001-8215-4764"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Paul, Indraneil","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101314389","display_name":"Jun Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Jun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079336821","display_name":"Goran Glava\u009a\u0161","orcid":"https://orcid.org/0000-0002-1301-6314"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Glava\u0161, Goran","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5027450194","display_name":"Iryna Gurevych","orcid":"https://orcid.org/0000-0003-2187-7621"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gurevych, Iryna","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":77},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.8919,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.8919,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.8896,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10906","display_name":"AI-based Problem Solving and Planning","score":0.8147,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6099435}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.70562303},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.61063826},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6099435},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.43738604},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.42789578},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.34315705},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.07905936},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.055885345},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.045153886},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2403.03894","pdf_url":"https://arxiv.org/pdf/2403.03894","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2403.03894","pdf_url":"https://arxiv.org/pdf/2403.03894","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4292559016","https://openalex.org/W4231937131","https://openalex.org/W323219885","https://openalex.org/W2258184894","https://openalex.org/W2163672025","https://openalex.org/W2063928587","https://openalex.org/W2048831961","https://openalex.org/W1606349578","https://openalex.org/W1589342014","https://openalex.org/W1487966966"],"abstract_inverted_index":{"Code":[0],"understanding":[1],"and":[2,40,105,165,185,196,206],"generation":[3,194],"have":[4,70],"fast":[5],"become":[6],"some":[7],"of":[8,13,22,46,85,103,120,174,192],"the":[9,51,83,100,157,162,168],"most":[10,67],"popular":[11],"applications":[12],"language":[14,63,151,164],"models":[15],"(LMs).":[16],"Nonetheless,":[17],"research":[18],"on":[19,73,154],"multilingual":[20,101,201],"aspects":[21],"Code-LMs":[23,69,104,137,158],"(i.e.,":[24],"LMs":[25],"for":[26,60],"code":[27,75,125,193,202,204],"generation)":[28],"such":[29],"as":[30],"cross-lingual":[31,107],"transfer":[32],"between":[33],"different":[34],"programming":[35,95,176],"languages,":[36],"language-specific":[37],"data":[38,47],"augmentation,":[39],"post-hoc":[41],"LM":[42],"adaptation,":[43],"alongside":[44],"exploitation":[45],"sources":[48],"other":[49],"than":[50,59],"original":[52],"textual":[53],"content,":[54],"has":[55],"been":[56,71],"much":[57],"sparser":[58],"their":[61],"natural":[62],"counterparts.":[64],"In":[65,78],"particular,":[66],"mainstream":[68],"pre-trained":[72],"source":[74,124],"files":[76,126],"alone.":[77],"this":[79,110],"work,":[80],"we":[81,112,146],"investigate":[82],"prospect":[84],"leveraging":[86],"readily":[87],"available":[88],"compiler":[89],"intermediate":[90,130],"representations":[91],"-":[92,97],"shared":[93],"across":[94,188],"languages":[96],"to":[98,143,159],"improve":[99],"capabilities":[102],"facilitate":[106],"transfer.":[108],"To":[109],"end,":[111],"first":[113],"compile":[114],"SLTrans,":[115,155],"a":[116,189],"parallel":[117],"dataset":[118],"consisting":[119],"nearly":[121],"4M":[122],"self-contained":[123],"coupled":[127],"with":[128,171],"respective":[129,172],"representations.":[131],"Next,":[132],"starting":[133],"from":[134,141],"various":[135,175],"base":[136],"(ranging":[138],"in":[139],"size":[140],"1.1B":[142],"7.3B":[144],"parameters),":[145],"carry":[147],"out":[148],"continued":[149],"causal":[150],"modelling":[152],"training":[153],"forcing":[156],"(1)":[160],"learn":[161],"IR":[163,169],"(2)":[166],"align":[167],"constructs":[170,173],"languages.":[177],"Our":[178],"resulting":[179],"models,":[180],"dubbed":[181],"IRCoder,":[182],"display":[183],"sizeable":[184],"consistent":[186],"gains":[187],"wide":[190],"variety":[191],"tasks":[195],"metrics,":[197],"including":[198],"prompt":[199],"robustness,":[200],"completion,":[203],"understanding,":[205],"instruction":[207],"following.":[208]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4392576563","counts_by_year":[],"updated_date":"2025-04-18T16:46:50.272230","created_date":"2024-03-08"}