{"id":"https://openalex.org/W4399447719","doi":"https://doi.org/10.48550/arxiv.2406.03030","title":"From Tarzan to Tolkien: Controlling the Language Proficiency Level of\n LLMs for Content Generation","display_name":"From Tarzan to Tolkien: Controlling the Language Proficiency Level of\n LLMs for Content Generation","publication_year":2024,"publication_date":"2024-06-05","ids":{"openalex":"https://openalex.org/W4399447719","doi":"https://doi.org/10.48550/arxiv.2406.03030"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2406.03030","pdf_url":"https://arxiv.org/pdf/2406.03030","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2406.03030","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041464552","display_name":"Ali Ahmad Malik","orcid":"https://orcid.org/0000-0001-7191-3614"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Malik, Ali","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014763831","display_name":"Stephen Mayhew","orcid":"https://orcid.org/0000-0003-1240-1488"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mayhew, Stephen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074969309","display_name":"Chris Piech","orcid":"https://orcid.org/0000-0001-5140-0467"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Piech, Chris","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5029982656","display_name":"Klinton Bicknell","orcid":"https://orcid.org/0000-0003-3404-7432"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bicknell, Klinton","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":80},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9861,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9861,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13523","display_name":"Mathematics, Computing, and Information Processing","score":0.9628,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/content","display_name":"Content (measure theory)","score":0.648462}],"concepts":[{"id":"https://openalex.org/C2778152352","wikidata":"https://www.wikidata.org/wiki/Q5165061","display_name":"Content (measure theory)","level":2,"score":0.648462},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.39537925},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.35202706},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13598835},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.12569597},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2406.03030","pdf_url":"https://arxiv.org/pdf/2406.03030","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2406.03030","pdf_url":"https://arxiv.org/pdf/2406.03030","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W4391301621","https://openalex.org/W2931662336","https://openalex.org/W2765597752","https://openalex.org/W2748952813","https://openalex.org/W2134894512","https://openalex.org/W2085372204","https://openalex.org/W2083375246","https://openalex.org/W2077865380","https://openalex.org/W2067108088"],"abstract_inverted_index":{"We":[0,121],"study":[1],"the":[2,6,35,74,107,119,124],"problem":[3],"of":[4,9,37,94,109,118,126],"controlling":[5],"difficulty":[7],"level":[8],"text":[10],"generated":[11],"by":[12],"Large":[13],"Language":[14,104],"Models":[15],"(LLMs)":[16],"for":[17,41],"contexts":[18],"where":[19],"end-users":[20],"are":[21],"not":[22],"fully":[23],"proficient,":[24],"such":[25],"as":[26],"language":[27],"learners.":[28],"Using":[29],"a":[30,67,91,116,130],"novel":[31],"framework,":[32],"we":[33,83],"evaluate":[34],"effectiveness":[36],"several":[38],"key":[39],"approaches":[40],"this":[42,88],"task,":[43],"including":[44],"few-shot":[45],"prompting,":[46],"supervised":[47],"finetuning,":[48],"and":[49,56,62,73,96,111],"reinforcement":[50],"learning":[51],"(RL),":[52],"utilising":[53],"both":[54],"GPT-4":[55,72,110],"open":[57,75],"source":[58,76],"alternatives":[59],"like":[60],"LLama2-7B":[61],"Mistral-7B.":[63],"Our":[64,99],"findings":[65],"reveal":[66],"large":[68],"performance":[69,108],"gap":[70,89],"between":[71],"models":[77],"when":[78],"using":[79],"prompt-based":[80],"strategies.":[81],"However,":[82],"show":[84],"how":[85],"to":[86],"bridge":[87],"with":[90],"careful":[92],"combination":[93],"finetuning":[95],"RL":[97],"alignment.":[98],"best":[100],"model,":[101],"CALM":[102],"(CEFR-Aligned":[103],"Model),":[105],"surpasses":[106],"other":[112],"strategies,":[113],"at":[114],"only":[115],"fraction":[117],"cost.":[120],"further":[122],"validate":[123],"quality":[125],"our":[127],"results":[128],"through":[129],"small-scale":[131],"human":[132],"study.":[133]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4399447719","counts_by_year":[],"updated_date":"2025-02-26T13:54:32.981285","created_date":"2024-06-08"}