{"id":"https://openalex.org/W4404346348","doi":"https://doi.org/10.48550/arxiv.2410.22793","title":"Less is More: DocString Compression in Code Generation","display_name":"Less is More: DocString Compression in Code Generation","publication_year":2024,"publication_date":"2024-10-30","ids":{"openalex":"https://openalex.org/W4404346348","doi":"https://doi.org/10.48550/arxiv.2410.22793"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.22793","pdf_url":"http://arxiv.org/pdf/2410.22793","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2410.22793","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003840863","display_name":"Guang Yang","orcid":"https://orcid.org/0000-0002-3374-6680"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Guang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038150414","display_name":"Yu Zhou","orcid":"https://orcid.org/0000-0002-3723-7584"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Yu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100708496","display_name":"Wei Cheng","orcid":"https://orcid.org/0000-0002-2032-7815"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, Wei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100362451","display_name":"Xiangyu Zhang","orcid":"https://orcid.org/0000-0001-8415-0003"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xiangyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100441911","display_name":"Xiang Chen","orcid":"https://orcid.org/0000-0002-1180-3891"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Xiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082624117","display_name":"Terry Yue Zhuo","orcid":"https://orcid.org/0000-0002-5760-5188"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhuo, Terry Yue","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100390553","display_name":"Peng Liu","orcid":"https://orcid.org/0000-0002-1667-6617"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Ke","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100771025","display_name":"Xin Zhou","orcid":"https://orcid.org/0000-0003-2515-704X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Xin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081036622","display_name":"David Lo","orcid":"https://orcid.org/0000-0002-4367-7201"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lo, David","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5030747944","display_name":"Taolue Chen","orcid":"https://orcid.org/0000-0002-5993-1665"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Taolue","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":77},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11450","display_name":"Model-Driven Software Engineering Techniques","score":0.8646,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11450","display_name":"Model-Driven Software Engineering Techniques","score":0.8646,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.7524,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10126","display_name":"Logic, programming, and type systems","score":0.74,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.59693044}],"concepts":[{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.60480154},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.59693044},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.41745678},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.26371402},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.1165711},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.07159209},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.22793","pdf_url":"http://arxiv.org/pdf/2410.22793","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.22793","pdf_url":"http://arxiv.org/pdf/2410.22793","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4396701345","https://openalex.org/W4396696052","https://openalex.org/W4391913857","https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2001405890"],"abstract_inverted_index":{"The":[0,162],"widespread":[1],"use":[2],"of":[3,151,164,179],"Large":[4],"Language":[5,70],"Models":[6],"(LLMs)":[7],"in":[8,61,68],"software":[9],"engineering":[10],"has":[11],"intensified":[12],"the":[13,46,51,86,149,173,177,180,193],"need":[14],"for":[15,23,45,53,118],"improved":[16],"model":[17],"and":[18,34,48,136,171,188],"resource":[19],"efficiency.":[20],"In":[21,104],"particular,":[22],"neural":[24],"code":[25,47,77,119,126],"generation,":[26],"LLMs":[27,131],"are":[28],"used":[29,49],"to":[30,36,76,115,133,168,191],"translate":[31],"function/method":[32],"signature":[33],"DocString":[35,116],"executable":[37],"code.":[38],"DocStrings":[39],"which":[40],"capture":[41],"user":[42],"re":[43],"quirements":[44],"as":[50,96],"prompt":[52,62,88],"LLMs,":[54],"often":[55],"contains":[56],"redundant":[57],"information.":[58],"Recent":[59],"advancements":[60],"compression":[63,89,111,117,146,160],"have":[64],"shown":[65],"promising":[66],"results":[67],"Natural":[69],"Processing":[71],"(NLP),":[72],"but":[73],"their":[74],"applicability":[75],"generation":[78,127],"remains":[79],"uncertain.":[80],"Our":[81,121],"empirical":[82],"study":[83],"show":[84],"that":[85,142],"state-of-the-art":[87],"methods":[90,157],"achieve":[91],"only":[92],"about":[93],"10%":[94],"reduction,":[95],"further":[97],"reductions":[98],"would":[99],"cause":[100],"significant":[101],"performance":[102],"degradation.":[103],"our":[105],"study,":[106],"we":[107],"propose":[108],"a":[109],"novel":[110],"method,":[112],"ShortenDoc,":[113],"dedicated":[114],"generation.":[120],"extensive":[122],"experiments":[123],"on":[124],"six":[125],"datasets,":[128],"five":[129],"open-source":[130],"(1B":[132],"10B":[134],"parameters),":[135],"one":[137],"closed-source":[138],"LLM":[139],"GPT-4o":[140],"confirm":[141],"ShortenDoc":[143],"achieves":[144],"25-40%":[145],"while":[147,175],"preserving":[148],"quality":[150,178],"generated":[152,181],"code,":[153,182],"outperforming":[154],"other":[155],"baseline":[156],"at":[158],"similar":[159],"levels.":[161],"benefit":[163],"this":[165],"research":[166],"is":[167,189],"improve":[169],"efficiency":[170],"reduce":[172,192],"cost":[174,196],"maintaining":[176],"especially":[183],"when":[184],"calling":[185],"third-party":[186],"APIs,":[187],"able":[190],"token":[194],"processing":[195],"by":[197],"25-40%.":[198]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4404346348","counts_by_year":[],"updated_date":"2025-04-23T19:12:13.979759","created_date":"2024-11-14"}