{"id":"https://openalex.org/W4390098020","doi":"https://doi.org/10.1109/iccd58817.2023.00068","title":"Hardware-Software Co-Design for Content-Based Sparse Attention","display_name":"Hardware-Software Co-Design for Content-Based Sparse Attention","publication_year":2023,"publication_date":"2023-11-06","ids":{"openalex":"https://openalex.org/W4390098020","doi":"https://doi.org/10.1109/iccd58817.2023.00068"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccd58817.2023.00068","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100655714","display_name":"Rui Tang","orcid":"https://orcid.org/0000-0002-2734-1003"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rui Tang","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","School of Microelectronics, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Microelectronics, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100419434","display_name":"Xiaoyu Zhang","orcid":"https://orcid.org/0000-0003-1630-6058"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoyu Zhang","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100448519","display_name":"Rui Liu","orcid":"https://orcid.org/0000-0002-6515-652X"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4610292","display_name":"Xiangtan University","ror":"https://ror.org/00xsfaz62","country_code":"CN","type":"education","lineage":["https://openalex.org/I4610292"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rui Liu","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","School of Materials Science and Engineering, Xiangtan University, Xiangtan, China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Materials Science and Engineering, Xiangtan University, Xiangtan, China","institution_ids":["https://openalex.org/I4610292"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102658018","display_name":"Zhejian Luo","orcid":null},"institutions":[{"id":"https://openalex.org/I4610292","display_name":"Xiangtan University","ror":"https://ror.org/00xsfaz62","country_code":"CN","type":"education","lineage":["https://openalex.org/I4610292"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhejian Luo","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","School of Materials Science and Engineering, Xiangtan University, Xiangtan, China"],"affiliations":[{"raw_affiliation_string":"School of Materials Science and Engineering, Xiangtan University, Xiangtan, China","institution_ids":["https://openalex.org/I4610292"]},{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100420361","display_name":"Xiaoming Chen","orcid":"https://orcid.org/0000-0002-7337-1844"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoming Chen","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016864694","display_name":"Yinhe Han","orcid":"https://orcid.org/0000-0003-0904-6681"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yinhe Han","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":68},"biblio":{"volume":null,"issue":null,"first_page":"415","last_page":"418"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9989,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9989,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9987,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9965,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.70099676}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8826617},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.7286546},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.70099676},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5545402},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.5275987},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.52671057},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4551856},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4214837},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.41882297},{"id":"https://openalex.org/C65232700","wikidata":"https://www.wikidata.org/wiki/Q5656403","display_name":"Hardware architecture","level":3,"score":0.41729376},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.34636056},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.22796142},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.22701636},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.21469527},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.08335891},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccd58817.2023.00068","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.65,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"grants":[{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":null}],"datasets":[],"versions":[],"referenced_works_count":8,"referenced_works":["https://openalex.org/W1968677765","https://openalex.org/W2017369466","https://openalex.org/W2896457183","https://openalex.org/W2979826702","https://openalex.org/W3131922516","https://openalex.org/W4312121036","https://openalex.org/W4323654151","https://openalex.org/W980713158"],"related_works":["https://openalex.org/W98480971","https://openalex.org/W2891987081","https://openalex.org/W2164382479","https://openalex.org/W2157978810","https://openalex.org/W2150291671","https://openalex.org/W2146343568","https://openalex.org/W2097707447","https://openalex.org/W2058965144","https://openalex.org/W2027972911","https://openalex.org/W2013643406"],"abstract_inverted_index":{"Attention-based":[0],"pre-trained":[1],"large":[2],"models":[3],"have":[4],"demonstrated":[5],"impressive":[6],"performance":[7],"in":[8],"many":[9],"domains":[10],"such":[11],"as":[12,70,72],"natural":[13],"language":[14],"processing":[15,35],"and":[16,151],"computer":[17],"vision.":[18],"Unfortunately,":[19],"due":[20],"to":[21,104,125],"the":[22,31,36,40,45,51,76,81,93,136],"quadratic":[23],"complexity":[24,111],"incurred":[25],"by":[26,80],"calculating":[27],"pairwise":[28],"correlations":[29],"across":[30],"entire":[32],"input":[33],"sequence,":[34],"attention":[37,52,77,123],"mechanism":[38,53,124],"becomes":[39],"arguably":[41],"major":[42],"bottleneck":[43],"of":[44,57,88],"whole":[46],"inference":[47],"execution.":[48],"To":[49],"accelerate":[50],"with":[54,92,135,155],"no":[55,156],"loss":[56],"accuracy,":[58],"we":[59,97,114],"present":[60],"a":[61,85,99,116],"novel":[62],"algorithm-architecture":[63],"co-design":[64],"that":[65,83],"can":[66],"substantially":[67],"save":[68],"runtime":[69],"well":[71],"energy":[73,153],"spent":[74],"on":[75,139,144],"mechanism.":[78],"Inspired":[79],"observation":[82],"only":[84],"small":[86],"subset":[87],"content":[89],"highly":[90],"correlates":[91],"others":[94],"under":[95],"attention,":[96],"devise":[98],"hardware-friendly":[100],"content-based":[101,121],"sparsity":[102],"scheme":[103],"eliminate":[105],"unnecessary":[106],"relations,":[107],"thus":[108],"reducing":[109],"computation":[110],"effectively.":[112],"Furthermore,":[113],"develop":[115],"tailored":[117],"hardware":[118],"for":[119],"this":[120,128],"sparse":[122],"best":[126],"utilize":[127],"algorithm":[129],"innovation.":[130],"Experiments":[131],"show":[132],"that,":[133],"compared":[134],"implementation":[137],"based":[138],"an":[140],"Nvidia":[141],"V100-SXM2":[142],"GPU,":[143],"average,":[145],"our":[146],"design":[147],"achieves":[148],"63\u00d7":[149],"speedup":[150],"505\u00d7":[152],"saving":[154],"accuracy":[157],"loss.":[158]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4390098020","counts_by_year":[],"updated_date":"2024-12-12T14:13:54.155454","created_date":"2023-12-23"}