{"id":"https://openalex.org/W4403575158","doi":"https://doi.org/10.48550/arxiv.2410.11744","title":"DySpec: Faster Speculative Decoding with Dynamic Token Tree Structure","display_name":"DySpec: Faster Speculative Decoding with Dynamic Token Tree Structure","publication_year":2024,"publication_date":"2024-10-15","ids":{"openalex":"https://openalex.org/W4403575158","doi":"https://doi.org/10.48550/arxiv.2410.11744"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.11744","pdf_url":"http://arxiv.org/pdf/2410.11744","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2410.11744","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055302542","display_name":"Yunkui Xiong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiong, Yunfan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020784751","display_name":"Ruoyu Zhang","orcid":"https://orcid.org/0000-0001-8284-7034"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Ruoyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101424236","display_name":"Yanzeng Li","orcid":"https://orcid.org/0000-0003-4880-5804"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yanzeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101908971","display_name":"Tianhao Wu","orcid":"https://orcid.org/0000-0003-3634-078X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Tianhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5033785339","display_name":"Lei Zou","orcid":"https://orcid.org/0000-0002-8586-4400"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zou, Lei","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11130","display_name":"Coding theory and cryptography","score":0.999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11130","display_name":"Coding theory and cryptography","score":0.999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10951","display_name":"Cryptographic Implementations and Security","score":0.9985,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.995,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.58201504},{"id":"https://openalex.org/keywords/token-passing","display_name":"Token passing","score":0.47420624}],"concepts":[{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.78428996},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.71558356},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.58201504},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5583526},{"id":"https://openalex.org/C115067241","wikidata":"https://www.wikidata.org/wiki/Q1639854","display_name":"Token passing","level":3,"score":0.47420624},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.23675346},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.23645857},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.230721},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.079524755}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.11744","pdf_url":"http://arxiv.org/pdf/2410.11744","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.11744","pdf_url":"http://arxiv.org/pdf/2410.11744","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4389471172","https://openalex.org/W2765256135","https://openalex.org/W2540135243","https://openalex.org/W2397526281","https://openalex.org/W2348435129","https://openalex.org/W2136545404","https://openalex.org/W2074350650","https://openalex.org/W2057608425","https://openalex.org/W2016681143","https://openalex.org/W2013402399"],"abstract_inverted_index":{"While":[0],"speculative":[1,60],"decoding":[2,61],"has":[3],"recently":[4],"appeared":[5],"as":[6,37],"a":[7,58,64,100,128],"promising":[8],"direction":[9],"for":[10,212],"accelerating":[11],"the":[12,19,27,74,89,106,141,145,173,180,195,201],"inference":[13],"of":[14,147,204],"large":[15],"language":[16],"models":[17],"(LLMs),":[18],"speedup":[20,133],"and":[21,77,82,85,132,143,154,164,178],"scalability":[22],"are":[23,92],"strongly":[24,93],"bounded":[25],"by":[26,72],"token":[28,42,67,107,148,209],"acceptance":[29,78,130],"rate.":[30],"Prevalent":[31],"methods":[32],"usually":[33],"organize":[34],"predicted":[35],"tokens":[36],"independent":[38],"chains":[39],"or":[40],"fixed":[41,135],"trees,":[43],"which":[44,157],"fails":[45],"to":[46,48,103,176,183,198],"generalize":[47],"diverse":[49],"query":[50],"distributions.":[51],"In":[52],"this":[53],"paper,":[54],"we":[55,98,113],"propose":[56],"DySpec,":[57],"faster":[59],"algorithm":[62],"with":[63],"novel":[65],"dynamic":[66],"tree":[68,108],"structure.":[69],"We":[70],"begin":[71],"bridging":[73],"draft":[75,213],"distribution":[76,153],"rate":[79,131],"from":[80],"intuitive":[81],"empirical":[83],"clues,":[84],"successfully":[86],"show":[87,114],"that":[88,115],"two":[90],"variables":[91],"correlated.":[94],"Based":[95],"on":[96,185],"this,":[97],"employ":[99],"greedy":[101],"strategy":[102],"dynamically":[104],"expand":[105],"at":[109],"run":[110],"time.":[111],"Theoretically,":[112],"our":[116],"method":[117],"can":[118,138,171,192],"achieve":[119],"optimal":[120],"results":[121],"under":[122],"mild":[123],"assumptions.":[124],"Empirically,":[125],"DySpec":[126,137,170,191],"yields":[127],"higher":[129],"than":[134,207],"trees.":[136],"drastically":[139],"improve":[140,172,194],"throughput":[142,174,196],"reduce":[144,179],"latency":[146,181],"generation":[149],"across":[150],"various":[151],"data":[152],"model":[155],"sizes,":[156],"significantly":[158],"outperforms":[159],"strong":[160],"competitors,":[161],"including":[162],"Specinfer":[163],"Sequoia.":[165],"Under":[166,187],"low":[167],"temperature":[168,189],"setting,":[169,190],"up":[175,182,197],"9.1$\\times$":[177],"9.4$\\times$":[184],"Llama2-70B.":[186],"high":[188],"also":[193],"6.21$\\times$,":[199],"despite":[200],"increasing":[202],"difficulty":[203],"speculating":[205],"more":[206],"one":[208],"per":[210],"step":[211],"model.":[214]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4403575158","counts_by_year":[],"updated_date":"2024-12-24T01:58:31.153522","created_date":"2024-10-20"}