{"id":"https://openalex.org/W4389519973","doi":"https://doi.org/10.18653/v1/2023.findings-emnlp.397","title":"Transformer Working Memory Enables Regular Language Reasoning And Natural Language Length Extrapolation","display_name":"Transformer Working Memory Enables Regular Language Reasoning And Natural Language Length Extrapolation","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4389519973","doi":"https://doi.org/10.18653/v1/2023.findings-emnlp.397"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2023.findings-emnlp.397","pdf_url":"https://aclanthology.org/2023.findings-emnlp.397.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://aclanthology.org/2023.findings-emnlp.397.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054133264","display_name":"Ta-Chung Chi","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ta-Chung Chi","raw_affiliation_strings":["Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085851733","display_name":"Ting-Han Fan","orcid":null},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ting-Han Fan","raw_affiliation_strings":["Princeton University"],"affiliations":[{"raw_affiliation_string":"Princeton University","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040668817","display_name":"Alexander I. Rudnicky","orcid":"https://orcid.org/0000-0003-2044-8446"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexander Rudnicky","raw_affiliation_strings":["Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041274293","display_name":"Peter J. Ramadge","orcid":"https://orcid.org/0000-0002-3282-216X"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Peter Ramadge","raw_affiliation_strings":["Princeton University"],"affiliations":[{"raw_affiliation_string":"Princeton University","institution_ids":["https://openalex.org/I20089843"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":67},"biblio":{"volume":null,"issue":null,"first_page":"5972","last_page":"5984"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9965,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C132459708","wikidata":"https://www.wikidata.org/wiki/Q744069","display_name":"Extrapolation","level":2,"score":0.89788383},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.69481534},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6720369},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.48074222},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.42463428},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38136715},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.36986148},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.35524327},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.35471636},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.17399186},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.083964914},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.08253318},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.076654226},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2023.findings-emnlp.397","pdf_url":"https://aclanthology.org/2023.findings-emnlp.397.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.03796","pdf_url":"https://arxiv.org/pdf/2305.03796","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2023.findings-emnlp.397","pdf_url":"https://aclanthology.org/2023.findings-emnlp.397.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"score":0.67,"id":"https://metadata.un.org/sdg/4","display_name":"Quality education"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":38,"referenced_works":["https://openalex.org/W1485981043","https://openalex.org/W1595783387","https://openalex.org/W1805361895","https://openalex.org/W1995385330","https://openalex.org/W2124479173","https://openalex.org/W2143462372","https://openalex.org/W2519091744","https://openalex.org/W2824297639","https://openalex.org/W2866343820","https://openalex.org/W2964110616","https://openalex.org/W2972324944","https://openalex.org/W2981757109","https://openalex.org/W2996428491","https://openalex.org/W3014096773","https://openalex.org/W3015468748","https://openalex.org/W3034561418","https://openalex.org/W3098666169","https://openalex.org/W3157700644","https://openalex.org/W3185020352","https://openalex.org/W4221143046","https://openalex.org/W4232424139","https://openalex.org/W4242290571","https://openalex.org/W4254816979","https://openalex.org/W4281381535","https://openalex.org/W4284701759","https://openalex.org/W4285206226","https://openalex.org/W4286903975","https://openalex.org/W4287019748","https://openalex.org/W4288089799","https://openalex.org/W4290802752","https://openalex.org/W4303633609","https://openalex.org/W4310625358","https://openalex.org/W4319991279","https://openalex.org/W4320087317","https://openalex.org/W4324312887","https://openalex.org/W4385245566","https://openalex.org/W4385570109","https://openalex.org/W4385572968"],"related_works":["https://openalex.org/W4288365749","https://openalex.org/W4287826556","https://openalex.org/W4287598411","https://openalex.org/W3198458223","https://openalex.org/W3126642501","https://openalex.org/W3100913109","https://openalex.org/W3098382480","https://openalex.org/W3094871513","https://openalex.org/W3013624417","https://openalex.org/W2936497627"],"abstract_inverted_index":{"Unlike":[0],"recurrent":[1],"models,":[2],"conventional":[3],"wisdom":[4],"has":[5],"it":[6,74],"that":[7,73],"Transformers":[8],"cannot":[9],"perfectly":[10],"model":[11],"regular":[12,53],"languages.":[13],"Inspired":[14],"by":[15],"the":[16,43,63,76],"notion":[17],"of":[18,33,52,65],"working":[19,40],"memory,":[20],"we":[21],"propose":[22],"a":[23],"new":[24],"Transformer":[25],"variant":[26],"named":[27],"RegularGPT.":[28],"With":[29],"its":[30],"novel":[31],"combination":[32],"Weight-Sharing,":[34],"Adaptive-Depth,":[35],"and":[36,49,70],"Sliding-Dilated-Attention,":[37],"RegularGPT":[38,61],"constructs":[39],"memory":[41],"along":[42],"depth":[44],"dimension,":[45],"thereby":[46],"enabling":[47],"efficient":[48],"successful":[50],"modeling":[51],"languages":[54],"such":[55],"as":[56],"PARITY.":[57],"We":[58],"further":[59],"test":[60],"on":[62],"task":[64],"natural":[66],"language":[67],"length":[68,87],"extrapolation":[69],"surprisingly":[71],"find":[72],"rediscovers":[75],"local":[77],"windowed":[78],"attention":[79],"effect":[80],"deemed":[81],"necessary":[82],"in":[83],"prior":[84],"work":[85],"for":[86],"extrapolation.":[88]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4389519973","counts_by_year":[],"updated_date":"2025-01-06T21:31:59.994070","created_date":"2023-12-11"}