{"id":"https://openalex.org/W4308761000","doi":"https://doi.org/10.48550/arxiv.2211.05110","title":"Large Language Models with Controllable Working Memory","display_name":"Large Language Models with Controllable Working Memory","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4308761000","doi":"https://doi.org/10.48550/arxiv.2211.05110"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2211.05110","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2211.05110","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017466259","display_name":"Daliang Li","orcid":"https://orcid.org/0000-0003-1751-7084"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Daliang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079621556","display_name":"Ankit Singh Rawat","orcid":"https://orcid.org/0000-0001-9790-6500"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rawat, Ankit Singh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102824470","display_name":"Manzil Zaheer","orcid":"https://orcid.org/0009-0004-9935-7583"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zaheer, Manzil","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100327839","display_name":"Xin Wang","orcid":"https://orcid.org/0000-0001-8246-0606"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Xin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083277438","display_name":"Micha\u0142 \u0141ukasik","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lukasik, Michal","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081627147","display_name":"Andreas Veit","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Veit, Andreas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111045439","display_name":"Felix Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Felix","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5018301052","display_name":"Sanjiv Kumar","orcid":"https://orcid.org/0000-0002-4080-1414"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kumar, Sanjiv","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.60916,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":75,"max":78},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9957,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9504,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness","score":0.74642193},{"id":"https://openalex.org/keywords/retraining","display_name":"Retraining","score":0.5053585},{"id":"https://openalex.org/keywords/context-model","display_name":"Context model","score":0.42164207}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.81111634},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.74642193},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6025783},{"id":"https://openalex.org/C108650721","wikidata":"https://www.wikidata.org/wiki/Q1783253","display_name":"Counterfactual thinking","level":2,"score":0.5487354},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5410797},{"id":"https://openalex.org/C48209547","wikidata":"https://www.wikidata.org/wiki/Q1331104","display_name":"Controllability","level":2,"score":0.53732735},{"id":"https://openalex.org/C2778712577","wikidata":"https://www.wikidata.org/wiki/Q3505966","display_name":"Retraining","level":2,"score":0.5053585},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.4696224},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.45114705},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.44698334},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.42164207},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.34624854},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3366168},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.119360715},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.09775028},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C155202549","wikidata":"https://www.wikidata.org/wiki/Q178803","display_name":"International trade","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2211.05110","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2211.05110","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2211.05110","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.52,"id":"https://metadata.un.org/sdg/4","display_name":"Quality education"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W68335373","https://openalex.org/W4385890381","https://openalex.org/W4384133558","https://openalex.org/W4286970243","https://openalex.org/W3201448254","https://openalex.org/W3096664139","https://openalex.org/W3025615835","https://openalex.org/W2402231715","https://openalex.org/W2066431708","https://openalex.org/W2036697162"],"abstract_inverted_index":{"Large":[0],"language":[1,13],"models":[2,29],"(LLMs)":[3],"have":[4],"led":[5],"to":[6,17,53,87,106,117,134,211,223],"a":[7,79,154,198,202],"series":[8],"of":[9,35,158,170,233],"breakthroughs":[10],"in":[11,72,109,167],"natural":[12],"processing":[14],"(NLP),":[15],"owing":[16],"their":[18],"excellent":[19],"understanding":[20],"and":[21,142,165,177,181,187,215,220,238],"generation":[22],"abilities.":[23],"Remarkably,":[24],"what":[25],"further":[26],"sets":[27],"these":[28],"apart":[30],"is":[31,132],"the":[32,47,58,62,68,73,88,98,110,130,135,137,159,168,231],"massive":[33],"amounts":[34],"world":[36,64],"knowledge":[37,65],"they":[38],"internalize":[39],"during":[40],"pretraining.":[41],"While":[42],"many":[43],"downstream":[44],"applications":[45],"provide":[46],"model":[48,104,122,138,195,236],"with":[49,67,97,193],"an":[50,82],"informational":[51],"context":[52,74,89,131,169],"aid":[54],"its":[55,146],"performance":[56],"on":[57,145],"underlying":[59],"task,":[60,136],"how":[61],"model's":[63,99],"interacts":[66],"factual":[69],"information":[70,94],"presented":[71],"remains":[75],"under":[76],"explored.":[77],"As":[78,197],"desirable":[80],"behavior,":[81],"LLM":[83],"should":[84,139],"give":[85],"precedence":[86],"whenever":[90],"it":[91,141],"contains":[92],"task-relevant":[93],"that":[95,174],"conflicts":[96],"memorized":[100],"knowledge.":[101,148],"This":[102],"enables":[103],"predictions":[105,123],"be":[107,115],"grounded":[108],"context,":[111],"which":[112,189],"can":[113],"then":[114],"used":[116],"update":[118],"or":[119],"correct":[120],"specific":[121],"without":[124],"frequent":[125],"retraining.":[126],"By":[127],"contrast,":[128],"when":[129],"irrelevant":[133,221],"ignore":[140],"fall":[143],"back":[144],"internal":[147],"In":[149],"this":[150],"paper,":[151],"we":[152,200],"undertake":[153],"first":[155],"joint":[156],"study":[157],"aforementioned":[160],"two":[161],"properties,":[162],"namely":[163],"controllability":[164,186,214],"robustness,":[166,188],"LLMs.":[171],"We":[172],"demonstrate":[173],"state-of-the-art":[175],"T5":[176],"PaLM":[178],"(both":[179],"pretrained":[180],"finetuned)":[182],"could":[183],"exhibit":[184],"poor":[185],"do":[190],"not":[191],"scale":[192],"increasing":[194],"size.":[196],"solution,":[199],"propose":[201],"novel":[203],"method":[204],"-":[205,210],"Knowledge":[206],"Aware":[207],"FineTuning":[208],"(KAFT)":[209],"strengthen":[212],"both":[213],"robustness":[216],"by":[217],"incorporating":[218],"counterfactual":[219],"contexts":[222],"standard":[224],"supervised":[225],"datasets.":[226],"Our":[227],"comprehensive":[228],"evaluation":[229],"showcases":[230],"utility":[232],"KAFT":[234],"across":[235],"architectures":[237],"sizes.":[239]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4308761000","counts_by_year":[{"year":2023,"cited_by_count":3}],"updated_date":"2025-04-24T03:54:35.390566","created_date":"2022-11-15"}