{"id":"https://openalex.org/W4391827186","doi":"https://doi.org/10.1109/tc.2024.3365942","title":"Accelerating Sparse DNNs Based on Tiled GEMM","display_name":"Accelerating Sparse DNNs Based on Tiled GEMM","publication_year":2024,"publication_date":"2024-02-14","ids":{"openalex":"https://openalex.org/W4391827186","doi":"https://doi.org/10.1109/tc.2024.3365942"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2024.3365942","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2402.10876","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101706991","display_name":"Cong Guo","orcid":"https://orcid.org/0000-0002-4479-5525"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cong Guo","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102007698","display_name":"Fengchen Xue","orcid":"https://orcid.org/0009-0001-6080-5703"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fengchen Xue","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003939279","display_name":"Jingwen Leng","orcid":"https://orcid.org/0000-0002-5660-5493"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingwen Leng","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037125614","display_name":"Yuxian Qiu","orcid":"https://orcid.org/0000-0003-4040-0159"},"institutions":[],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuxian Qiu","raw_affiliation_strings":["NVIDIA, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"NVIDIA, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091217469","display_name":"Yue Guan","orcid":"https://orcid.org/0009-0005-7433-2627"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yue Guan","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008837660","display_name":"Weihao Cui","orcid":"https://orcid.org/0000-0002-6646-5260"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weihao Cui","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100377840","display_name":"Quan Chen","orcid":"https://orcid.org/0000-0001-5832-0347"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quan Chen","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039318240","display_name":"Minyi Guo","orcid":"https://orcid.org/0000-0003-0034-2302"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minyi Guo","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.463,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.999951,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":93,"max":96},"biblio":{"volume":"73","issue":"5","first_page":"1275","last_page":"1289"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9993,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9993,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11609","display_name":"Geophysical Methods and Applications","score":0.9942,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9901,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.69992644},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.54567915},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.44447458},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3954752},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.109261245},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2024.3365942","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2402.10876","pdf_url":"https://arxiv.org/pdf/2402.10876","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2402.10876","pdf_url":"https://arxiv.org/pdf/2402.10876","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.42,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"grants":[{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":"62222210"},{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":"U21B2017"},{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":"62072297"},{"funder":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China","award_id":"2021ZD0110104"}],"datasets":[],"versions":[],"referenced_works_count":30,"referenced_works":["https://openalex.org/W2101105183","https://openalex.org/W2194775991","https://openalex.org/W2606722458","https://openalex.org/W2618530766","https://openalex.org/W2707890836","https://openalex.org/W2765315405","https://openalex.org/W2767785892","https://openalex.org/W2896457183","https://openalex.org/W2899244816","https://openalex.org/W2923014074","https://openalex.org/W2954698171","https://openalex.org/W2963324947","https://openalex.org/W2963748441","https://openalex.org/W2965862774","https://openalex.org/W2980186997","https://openalex.org/W3016542674","https://openalex.org/W3017746288","https://openalex.org/W3092319711","https://openalex.org/W3132616766","https://openalex.org/W3187908937","https://openalex.org/W3205706264","https://openalex.org/W3210432446","https://openalex.org/W3211878177","https://openalex.org/W4221162983","https://openalex.org/W4293023328","https://openalex.org/W4302296459","https://openalex.org/W4308083739","https://openalex.org/W4366341968","https://openalex.org/W4384705353","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W4391913857","https://openalex.org/W4391375266","https://openalex.org/W3202552726","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2279642117","https://openalex.org/W2073681303","https://openalex.org/W2051487156"],"abstract_inverted_index":{"Network":[0],"pruning":[1,77,122],"can":[2,158],"reduce":[3],"the":[4,58,68,82,89,101,112,124,133,139,144,149,153,177,183,186,197],"computation":[5],"cost":[6],"of":[7,190],"deep":[8],"neural":[9],"network":[10],"(DNN)":[11],"models.":[12,180],"However,":[13],"sparse":[14,29,63,154,178],"models":[15,30],"often":[16],"produce":[17],"randomly-distributed":[18],"weights":[19],"to":[20,24,127,169],"maintain":[21,128],"accuracy,":[22],"leading":[23],"irregular":[25,121],"computations.":[26,42],"Consequently,":[27],"unstructured":[28,203],"cannot":[31],"achieve":[32],"meaningful":[33],"speedup":[34],"on":[35,185],"commodity":[36],"hardware":[37],"built":[38],"for":[39,53,96,115,120],"dense":[40,198],"matrix":[41,85,91],"Accelerators":[43],"are":[44],"usually":[45],"modified":[46],"or":[47],"designed":[48],"with":[49],"structured":[50,108],"sparsity-optimized":[51],"architectures":[52],"exploiting":[54],"sparsity.":[55,204],"For":[56],"example,":[57],"Ampere":[59],"architecture":[60],"introduces":[61],"a":[62,76,107,164],"tensor":[64,155],"core,":[65],"which":[66,105],"adopts":[67],"2:4":[69,145],"sparsity":[70,103,109,135,146,167,173],"pattern.":[71],"We":[74],"propose":[75],"method":[78],"that":[79,84],"builds":[80],"upon":[81],"insight":[83],"multiplication":[86],"generally":[87],"breaks":[88],"large":[90],"into":[92,163],"multiple":[93],"smaller":[94],"tiles":[95],"parallel":[97],"execution.":[98],"We":[99,157,181],"present":[100],"\"tile-wise\"":[102],"pattern,":[104],"maintains":[106],"pattern":[110,168],"at":[111,123,138,148],"tile":[113],"level":[114,151],"efficient":[116],"execution":[117],"but":[118],"allows":[119],"global":[125,140],"scale":[126],"high":[129],"accuracy.":[130],"In":[131],"addition,":[132],"tile-wise":[134],"is":[136],"implemented":[137],"memory":[141],"level,":[142],"and":[143,174,193,202],"executes":[147],"register":[150],"inside":[152],"core.":[156],"combine":[159],"these":[160],"two":[161],"patterns":[162],"\"tile-vector-wise\"":[165],"(TVW)":[166],"explore":[170],"more":[171],"fine-grained":[172],"further":[175],"accelerate":[176],"DNN":[179],"evaluate":[182],"TVW":[184],"GPU,":[187],"achieving":[188],"averages":[189],"1:85\u00d7,":[191],"2:75\u00d7,":[192],"22:18\u00d7":[194],"speedups":[195],"over":[196],"model,":[199],"block":[200],"sparsity,":[201]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4391827186","counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2024-12-08T08:35:33.777357","created_date":"2024-02-15"}