{"id":"https://openalex.org/W3207820301","doi":"https://doi.org/10.1109/fpl53798.2021.00060","title":"Leveraging Fine-grained Structured Sparsity for CNN Inference on Systolic Array Architectures","display_name":"Leveraging Fine-grained Structured Sparsity for CNN Inference on Systolic Array Architectures","publication_year":2021,"publication_date":"2021-08-01","ids":{"openalex":"https://openalex.org/W3207820301","doi":"https://doi.org/10.1109/fpl53798.2021.00060","mag":"3207820301"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/fpl53798.2021.00060","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078749880","display_name":"Linqiao Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Linqiao Liu","raw_affiliation_strings":["University of Toronto, Toronto, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"University of Toronto, Toronto, Ontario, Canada","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102840804","display_name":"Stephen D. Brown","orcid":"https://orcid.org/0009-0009-8329-1504"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Stephen Brown","raw_affiliation_strings":["University of Toronto, Toronto, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"University of Toronto, Toronto, Ontario, Canada","institution_ids":["https://openalex.org/I185261750"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.492,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.522735,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":86,"max":87},"biblio":{"volume":"abs 1608 8710","issue":null,"first_page":"301","last_page":"305"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9967,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9966,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/systolic-array","display_name":"Systolic array","score":0.7743602},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6422697},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.48013207},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.42102706}],"concepts":[{"id":"https://openalex.org/C150741067","wikidata":"https://www.wikidata.org/wiki/Q2377218","display_name":"Systolic array","level":3,"score":0.7743602},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.75201166},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.7393292},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6918624},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6550361},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6422697},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5023279},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.48013207},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.42102706},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.41865212},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3895808},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.33534873},{"id":"https://openalex.org/C14580979","wikidata":"https://www.wikidata.org/wiki/Q876049","display_name":"Very-large-scale integration","level":2,"score":0.24299157},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.20594516},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.16757807},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/fpl53798.2021.00060","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.42,"id":"https://metadata.un.org/sdg/9"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":19,"referenced_works":["https://openalex.org/W1686810756","https://openalex.org/W2108598243","https://openalex.org/W2114766824","https://openalex.org/W2194775991","https://openalex.org/W2585720638","https://openalex.org/W2625954420","https://openalex.org/W2761085955","https://openalex.org/W2796347433","https://openalex.org/W2805003733","https://openalex.org/W2915106038","https://openalex.org/W2946787800","https://openalex.org/W2949619037","https://openalex.org/W2962835968","https://openalex.org/W2962965870","https://openalex.org/W2963813662","https://openalex.org/W2964299589","https://openalex.org/W2970971581","https://openalex.org/W4293584584","https://openalex.org/W4295312788"],"related_works":["https://openalex.org/W98480971","https://openalex.org/W2347854075","https://openalex.org/W2164382479","https://openalex.org/W2157978810","https://openalex.org/W2150291671","https://openalex.org/W2146343568","https://openalex.org/W2115380918","https://openalex.org/W2058965144","https://openalex.org/W2027972911","https://openalex.org/W2013643406"],"abstract_inverted_index":{"The":[0],"high":[1],"computational":[2],"complexity":[3],"of":[4,13,48,59],"convolutional":[5],"neural":[6],"networks":[7],"(CNNs)":[8],"has":[9,67],"motivated":[10],"many":[11],"studies":[12],"accelerating":[14],"CNN":[15,51,104],"inference":[16,105],"on":[17,73,117,126,139],"field-programmable":[18],"gate":[19],"arrays":[20,28],"(FPGAs).":[21],"Among":[22],"these,":[23],"designs":[24],"that":[25,94,107,121],"feature":[26],"systolic":[27,74,102,163],"can":[29,54],"effectively":[30],"leverage":[31],"the":[32,46,57,127,140,145],"parallelism":[33],"in":[34,50,64,70],"CNNs":[35],"while":[36,143],"acheiving":[37],"good":[38],"placement":[39],"and":[40,99,119,124,135,156],"routing":[41],"quality.":[42],"Weight":[43],"sparsity":[44,86,97],"\u2013":[45,53],"presence":[47],"zeros":[49],"weights":[52],"further":[55],"reduce":[56],"number":[58],"necessary":[60],"multiply-accumulate":[61],"(MAC)":[62],"operations":[63],"CNNs,":[65],"but":[66],"yet":[68],"resulted":[69],"performance":[71],"gain":[72],"arrays.":[75],"In":[76],"this":[77,96],"work,":[78],"we":[79],"propose":[80],"a":[81,89,101,161],"novel":[82],"fine-grained":[83],"structured":[84],"weight":[85],"pattern,":[87,98],"showcase":[88],"processing":[90],"element":[91],"(PE)":[92],"design":[93],"leverages":[95],"develop":[100],"array":[103,164],"accelerator":[106,131],"targets":[108],"an":[109],"Intel":[110],"Arria":[111],"10":[112],"GX1150":[113],"FPGA.":[114],"When":[115],"evaluated":[116],"ResNet-50":[118],"VGG-16":[120],"are":[122],"trained":[123],"pruned":[125],"ImageNet":[128],"dataset,":[129],"our":[130],"achieves":[132],"2.26":[133],"TOPs/s":[134],"1.21":[136],"TOPs/s,":[137],"respectively,":[138],"MAC":[141],"operations,":[142],"keeping":[144],"top-l":[146],"accuracy":[147],"degradation":[148],"within":[149],"5%.":[150],"These":[151],"results":[152],"translate":[153],"to":[154,160],"$2.86\\times$":[155],"$1.75\\times$":[157],"speed-up":[158],"compared":[159],"dense":[162],"baseline.":[165]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3207820301","counts_by_year":[{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":4}],"updated_date":"2025-01-09T22:24:25.878097","created_date":"2021-10-25"}