{"id":"https://openalex.org/W4403964211","doi":"https://doi.org/10.48550/arxiv.2410.04798","title":"DAPE V2: Process Attention Score as Feature Map for Length Extrapolation","display_name":"DAPE V2: Process Attention Score as Feature Map for Length Extrapolation","publication_year":2024,"publication_date":"2024-10-07","ids":{"openalex":"https://openalex.org/W4403964211","doi":"https://doi.org/10.48550/arxiv.2410.04798"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.04798","pdf_url":"http://arxiv.org/pdf/2410.04798","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2410.04798","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114047453","display_name":"Chuanyang Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Chuanyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042209803","display_name":"Yihang Gao","orcid":"https://orcid.org/0000-0001-8883-7769"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Yihang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100306406","display_name":"Han Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Han","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034530775","display_name":"Jing Xiong","orcid":"https://orcid.org/0000-0002-9712-6771"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiong, Jing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101424691","display_name":"Jiankai Sun","orcid":"https://orcid.org/0000-0002-7214-0665"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Jiankai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100739865","display_name":"Jingyao Li","orcid":"https://orcid.org/0000-0003-1498-5501"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jingyao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052117191","display_name":"Minbin Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Minbin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079912837","display_name":"Xiaozhe Ren","orcid":"https://orcid.org/0000-0002-0432-5510"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ren, Xiaozhe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010561682","display_name":"Michael K. Ng","orcid":"https://orcid.org/0000-0001-6833-5227"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ng, Michael","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021073698","display_name":"Xin Jiang","orcid":"https://orcid.org/0000-0003-2670-3188"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Xin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077862962","display_name":"Zhenguo Li","orcid":"https://orcid.org/0009-0003-6369-0213"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Zhenguo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5115595857","display_name":"Yu Li","orcid":"https://orcid.org/0000-0002-7670-3998"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yu","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":80},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11159","display_name":"Manufacturing Process and Optimization","score":0.9845,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11159","display_name":"Manufacturing Process and Optimization","score":0.9845,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9726,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11443","display_name":"Advanced Statistical Process Monitoring","score":0.9485,"subfield":{"id":"https://openalex.org/subfields/1804","display_name":"Statistics, Probability and Uncertainty"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6582334}],"concepts":[{"id":"https://openalex.org/C132459708","wikidata":"https://www.wikidata.org/wiki/Q744069","display_name":"Extrapolation","level":2,"score":0.79267293},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6582334},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.54074717},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.45691213},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42307624},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.34968388},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.32444483},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.2776994},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.09843817},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.070862204},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.062005997}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.04798","pdf_url":"http://arxiv.org/pdf/2410.04798","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.04798","pdf_url":"http://arxiv.org/pdf/2410.04798","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4361730764","https://openalex.org/W4296478327","https://openalex.org/W2387596242","https://openalex.org/W2220129715","https://openalex.org/W2042397106","https://openalex.org/W2042327336","https://openalex.org/W2033914206","https://openalex.org/W1968270095","https://openalex.org/W1965029248","https://openalex.org/W1960072520"],"abstract_inverted_index":{"The":[0,143],"attention":[1,27,50,72,84,171],"mechanism":[2],"is":[3,104],"a":[4,74,114,137,173,180],"fundamental":[5],"component":[6],"of":[7,45,65,101,116,120],"the":[8,26,33,57,63,79,91,98,108,117,121,132,156,161],"Transformer":[9,109,158,185],"model,":[10],"contributing":[11],"to":[12,19,89,150],"interactions":[13],"among":[14],"distinct":[15],"tokens,":[16],"in":[17,94],"contrast":[18],"earlier":[20],"feed-forward":[21],"neural":[22],"networks.":[23],"In":[24,67],"general,":[25],"scores":[28,51,85],"are":[29],"determined":[30],"simply":[31],"by":[32],"key-query":[34,59],"products.":[35],"However,":[36],"this":[37,68,102],"work's":[38],"occasional":[39],"trial":[40],"(combining":[41],"DAPE":[42],"and":[43,77,106,124,128,176],"NoPE)":[44],"including":[46],"additional":[47],"MLPs":[48],"on":[49],"without":[52],"position":[53],"encoding":[54],"indicates":[55],"that":[56,155,169],"classical":[58],"multiplication":[60],"may":[61],"limit":[62],"performance":[64],"Transformers.":[66],"work,":[69],"we":[70,129],"conceptualize":[71],"as":[73,113,172,179],"feature":[75,139,174],"map":[76,140,175],"apply":[78],"convolution":[80,178],"operator":[81],"(for":[82],"neighboring":[83],"across":[86],"different":[87],"heads)":[88],"mimic":[90],"processing":[92,141,181],"methods":[93],"computer":[95],"vision.":[96],"Specifically,":[97],"main":[99],"contribution":[100],"paper":[103],"identifying":[105],"interpreting":[107],"length":[110,133],"extrapolation":[111,134],"problem":[112],"result":[115],"limited":[118],"expressiveness":[119],"naive":[122],"query":[123],"key":[125],"dot":[126],"product,":[127],"successfully":[130],"translate":[131],"issue":[135],"into":[136],"well-understood":[138],"problem.":[142],"novel":[144],"insight,":[145],"which":[146],"can":[147],"be":[148],"adapted":[149],"various":[151],"attention-related":[152],"models,":[153],"reveals":[154],"current":[157],"architecture":[159],"has":[160],"potential":[162],"for":[163],"further":[164],"evolution.":[165],"Extensive":[166],"experiments":[167],"demonstrate":[168],"treating":[170],"applying":[177],"method":[182],"significantly":[183],"enhances":[184],"performance.":[186]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4403964211","counts_by_year":[],"updated_date":"2025-02-27T08:28:13.255665","created_date":"2024-11-01"}