{"id":"https://openalex.org/W4403965514","doi":"https://doi.org/10.48550/arxiv.2408.03675","title":"NACL: A General and Effective KV Cache Eviction Framework for LLMs at\n Inference Time","display_name":"NACL: A General and Effective KV Cache Eviction Framework for LLMs at\n Inference Time","publication_year":2024,"publication_date":"2024-08-07","ids":{"openalex":"https://openalex.org/W4403965514","doi":"https://doi.org/10.48550/arxiv.2408.03675"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2408.03675","pdf_url":"http://arxiv.org/pdf/2408.03675","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2408.03675","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005755293","display_name":"Yilong Chen","orcid":"https://orcid.org/0009-0002-0732-3323"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yilong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049245418","display_name":"Guoxia Wang","orcid":"https://orcid.org/0000-0002-3145-3892"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Guoxia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108988113","display_name":"Junyuan Shang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shang, Junyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028351629","display_name":"Shiyao Cui","orcid":"https://orcid.org/0009-0003-2333-1064"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cui, Shiyao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100389498","display_name":"Zhenyu Zhang","orcid":"https://orcid.org/0000-0001-5570-090X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zhenyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080686378","display_name":"Tingwen Liu","orcid":"https://orcid.org/0000-0002-0750-6923"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Tingwen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071055669","display_name":"Shuohuan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Shuohuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115595077","display_name":"Yu Sun","orcid":"https://orcid.org/0000-0002-5364-2387"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Yu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084155236","display_name":"Dianhai Yu","orcid":"https://orcid.org/0000-0002-0163-2603"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Dianhai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5032328549","display_name":"Hua Wu","orcid":"https://orcid.org/0000-0002-8829-0182"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Hua","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":82},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T13999","display_name":"Digital Rights Management and Security","score":0.831,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13999","display_name":"Digital Rights Management and Security","score":0.831,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13643","display_name":"Artificial Intelligence in Law","score":0.7753,"subfield":{"id":"https://openalex.org/subfields/3320","display_name":"Political Science and International Relations"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11653","display_name":"Financial Distress and Bankruptcy Prediction","score":0.6957,"subfield":{"id":"https://openalex.org/subfields/1402","display_name":"Accounting"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/eviction","display_name":"Eviction","score":0.8000496}],"concepts":[{"id":"https://openalex.org/C2778227907","wikidata":"https://www.wikidata.org/wiki/Q1893186","display_name":"Eviction","level":2,"score":0.8000496},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.7631633},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.75515753},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4762057},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3735696},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.35122958},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.2513969},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.13980073},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.074502766}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2408.03675","pdf_url":"http://arxiv.org/pdf/2408.03675","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2408.03675","pdf_url":"http://arxiv.org/pdf/2408.03675","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391680691","https://openalex.org/W4280511435","https://openalex.org/W3184996973","https://openalex.org/W3152917484","https://openalex.org/W3147963612","https://openalex.org/W3133572098","https://openalex.org/W3114967662","https://openalex.org/W3013818329","https://openalex.org/W2602706871","https://openalex.org/W2257631059"],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"have":[4],"ignited":[5],"an":[6],"innovative":[7],"surge":[8],"of":[9,16,37,56,64,130,138],"AI":[10],"applications,":[11],"marking":[12],"a":[13,86,102],"new":[14],"era":[15],"exciting":[17],"possibilities":[18],"equipped":[19],"with":[20,124,177],"extended":[21],"context":[22],"windows.":[23],"However,":[24],"hosting":[25],"these":[26],"models":[27],"is":[28,184],"cost-prohibitive":[29],"mainly":[30],"due":[31],"to":[32,47,110,134,175],"the":[33,52,60,106,125,136,143,158],"extensive":[34],"memory":[35],"consumption":[36],"KV":[38,53,91,171],"Cache":[39,172],"involving":[40],"long-context":[41,90,150],"modeling.":[42],"Despite":[43],"several":[44],"works":[45],"proposing":[46],"evict":[48],"unnecessary":[49],"tokens":[50,148],"from":[51],"Cache,":[54],"most":[55],"them":[57],"rely":[58],"on":[59,76,160],"biased":[61],"local":[62],"statistics":[63,119],"accumulated":[65],"attention":[66,117,139],"scores":[67],"and":[68,98,141,162,167],"report":[69],"performance":[70,159,180],"using":[71],"unconvincing":[72],"metric":[73],"like":[74],"perplexity":[75],"inadequate":[77],"short-text":[78],"evaluation.":[79],"In":[80],"this":[81],"paper,":[82],"we":[83,113],"propose":[84],"NACL,":[85],"general":[87],"framework":[88],"for":[89,149],"cache":[92],"eviction":[93,100,128],"that":[94],"achieves":[95],"more":[96,115],"optimal":[97],"efficient":[99],"in":[101,120,145],"single":[103],"operation":[104],"during":[105],"encoding":[107],"phase.":[108],"Due":[109],"NACL's":[111],"efficiency,":[112],"combine":[114],"accurate":[116],"score":[118],"PROXY":[121],"TOKENS":[122],"EVICTION":[123],"diversified":[126],"random":[127],"strategy":[129],"RANDOM":[131],"EVICTION,":[132],"aiming":[133],"alleviate":[135],"issue":[137],"bias":[140],"enhance":[142],"robustness":[144],"maintaining":[146],"pivotal":[147],"modeling":[151],"tasks.":[152],"Notably,":[153],"our":[154],"method":[155],"significantly":[156],"improves":[157],"short-":[161],"long-text":[163],"tasks":[164],"by":[165,173],"80%":[166],"76%":[168],"respectively,":[169],"reducing":[170],"up":[174],"50%":[176],"over":[178],"95%":[179],"maintenance.":[181],"The":[182],"code":[183],"available":[185],"at":[186],"https://github.com/PaddlePaddle/Research/tree/master/NLP/ACL2024-NACL.":[187]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4403965514","counts_by_year":[],"updated_date":"2025-01-22T10:22:33.798137","created_date":"2024-11-01"}