{"id":"https://openalex.org/W2944824859","doi":"https://doi.org/10.1145/3316781.3317875","title":"Overcoming Data Transfer Bottlenecks in FPGA-based DNN Accelerators via Layer Conscious Memory Management","display_name":"Overcoming Data Transfer Bottlenecks in FPGA-based DNN Accelerators via Layer Conscious Memory Management","publication_year":2019,"publication_date":"2019-05-23","ids":{"openalex":"https://openalex.org/W2944824859","doi":"https://doi.org/10.1145/3316781.3317875","mag":"2944824859"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3316781.3317875","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102904614","display_name":"Xuechao Wei","orcid":"https://orcid.org/0000-0002-0996-2260"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuechao Wei","raw_affiliation_strings":["Center for Energy-efficient Computing and Applications, School of EECS, Peking University, China and Falcon Computing Solutions, Inc., Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"Center for Energy-efficient Computing and Applications, School of EECS, Peking University, China and Falcon Computing Solutions, Inc., Los Angeles, CA, USA","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100604860","display_name":"Yun Liang","orcid":"https://orcid.org/0000-0002-9076-7998"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yun Liang","raw_affiliation_strings":["Center for Energy-efficient Computing and Applications, School of EECS, Peking University, China"],"affiliations":[{"raw_affiliation_string":"Center for Energy-efficient Computing and Applications, School of EECS, Peking University, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016776689","display_name":"Jason Cong","orcid":"https://orcid.org/0000-0003-2887-6963"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I161318765","display_name":"University of California, Los Angeles","ror":"https://ror.org/046rm7j60","country_code":"US","type":"education","lineage":["https://openalex.org/I161318765"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Jason Cong","raw_affiliation_strings":["Center for Energy-efficient Computing and Applications, School of EECS, Peking University, China and Computer Science Department, University of California, Los Angeles, CA, USA and Falcon Computing Solutions, Inc., Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"Center for Energy-efficient Computing and Applications, School of EECS, Peking University, China and Computer Science Department, University of California, Los Angeles, CA, USA and Falcon Computing Solutions, Inc., Los Angeles, CA, USA","institution_ids":["https://openalex.org/I20231570","https://openalex.org/I161318765"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":12.61,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":47,"citation_normalized_percentile":{"value":0.999954,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10502","display_name":"Memristive Devices for Neuromorphic Computing","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10502","display_name":"Memristive Devices for Neuromorphic Computing","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Deep Learning in Computer Vision and Image Recognition","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Performance Optimization","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/multicore-architectures","display_name":"Multicore Architectures","score":0.533554},{"id":"https://openalex.org/keywords/performance-optimization","display_name":"Performance Optimization","score":0.528454},{"id":"https://openalex.org/keywords/gpu-computing","display_name":"GPU Computing","score":0.521751},{"id":"https://openalex.org/keywords/memory-systems","display_name":"Memory Systems","score":0.521393},{"id":"https://openalex.org/keywords/non-volatile-memory","display_name":"Non-Volatile Memory","score":0.520365},{"id":"https://openalex.org/keywords/distributed-shared-memory","display_name":"Distributed shared memory","score":0.42248276}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8308358},{"id":"https://openalex.org/C63511323","wikidata":"https://www.wikidata.org/wiki/Q908936","display_name":"Interleaved memory","level":4,"score":0.5284053},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.524721},{"id":"https://openalex.org/C51290061","wikidata":"https://www.wikidata.org/wiki/Q1936765","display_name":"Uniform memory access","level":4,"score":0.52039015},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5101388},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.46739516},{"id":"https://openalex.org/C93446704","wikidata":"https://www.wikidata.org/wiki/Q449328","display_name":"Registered memory","level":3,"score":0.43849966},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.42943847},{"id":"https://openalex.org/C39528615","wikidata":"https://www.wikidata.org/wiki/Q1229610","display_name":"Distributed shared memory","level":5,"score":0.42248276},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.38260224},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.38249323},{"id":"https://openalex.org/C98986596","wikidata":"https://www.wikidata.org/wiki/Q1143031","display_name":"Semiconductor memory","level":2,"score":0.33721805},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.2977215},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3316781.3317875","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":28,"referenced_works":["https://openalex.org/W2002555321","https://openalex.org/W2094756095","https://openalex.org/W2094917740","https://openalex.org/W2097117768","https://openalex.org/W2194775991","https://openalex.org/W2274287116","https://openalex.org/W2294282016","https://openalex.org/W2489529491","https://openalex.org/W2565125333","https://openalex.org/W2565305208","https://openalex.org/W2574797063","https://openalex.org/W2584311934","https://openalex.org/W2585560244","https://openalex.org/W2585774018","https://openalex.org/W2625954420","https://openalex.org/W2627042741","https://openalex.org/W2727238169","https://openalex.org/W2729080111","https://openalex.org/W2796625795","https://openalex.org/W2808917878","https://openalex.org/W2899915146","https://openalex.org/W2900082550","https://openalex.org/W2917087921","https://openalex.org/W2963446712","https://openalex.org/W2964350391","https://openalex.org/W3103894541","https://openalex.org/W4249932213","https://openalex.org/W4301361180"],"related_works":["https://openalex.org/W4321458411","https://openalex.org/W3048967625","https://openalex.org/W3021597805","https://openalex.org/W2782503170","https://openalex.org/W2354036839","https://openalex.org/W2155373950","https://openalex.org/W2136295296","https://openalex.org/W2044064773","https://openalex.org/W1575240748","https://openalex.org/W120214571"],"abstract_inverted_index":{"Deep":[0],"Neural":[1],"Networks":[2],"(DNNs)":[3],"are":[4,30,157],"becoming":[5],"more":[6,8],"and":[7,24,80,104,155],"complex":[9],"than":[10],"before.":[11],"Previous":[12],"hardware":[13,72],"accelerator":[14],"designs":[15],"neglect":[16],"the":[17,33,43,77,81,91,96,99,106,134],"layer":[18,64,78],"diversity":[19,79],"in":[20],"terms":[21],"of":[22,46,85,98,109,113],"computation":[23],"communication":[25],"behavior.":[26],"On-chip":[27],"memory":[28,34,55,66,86,93,103,126,135,150],"resources":[29],"underutilized":[31],"for":[32,69,133],"bounded":[35,101],"layers,":[36],"leading":[37],"to":[38,52,88,94,129,147,159],"suboptimal":[39],"performance.":[40],"In":[41,138],"addition,":[42,139],"increasing":[44],"complexity":[45],"DNN":[47,71],"structures":[48],"makes":[49],"it":[50],"difficult":[51],"do":[53],"on-chip":[54,92,131,149],"allocation.":[56],"To":[57],"address":[58],"these":[59],"issues,":[60],"we":[61],"propose":[62],"a":[63,125],"conscious":[65],"management":[67],"framework":[68,75],"FPGA-based":[70],"accelerators.":[73],"Our":[74],"exploits":[76],"disjoint":[82],"lifespan":[83],"information":[84],"buffers":[87,132],"efficiently":[89],"utilize":[90],"improve":[95,148],"performance":[97,108,171],"layers":[100,144],"by":[102],"thus":[105],"entire":[107],"DNNs.":[110],"It":[111],"consists":[112],"four":[114],"key":[115],"techniques":[116,167],"working":[117],"coordinately":[118],"with":[119,174],"each":[120],"other.":[121],"We":[122],"first":[123],"devise":[124],"allocation":[127],"algorithm":[128],"allocate":[130],"bound":[136],"layers.":[137],"buffer":[140,153],"sharing":[141],"between":[142],"different":[143],"is":[145],"applied":[146],"utilization.":[151],"Finally,":[152],"prefetching":[154],"splitting":[156],"used":[158],"further":[160],"reduce":[161],"latency.":[162],"Experiments":[163],"show":[164],"that":[165],"our":[166],"can":[168],"achieve":[169],"1.36X":[170],"improvement":[172],"compared":[173],"previous":[175],"designs.":[176]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2944824859","counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":11},{"year":2021,"cited_by_count":18},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":1}],"updated_date":"2024-11-29T05:40:09.135944","created_date":"2019-05-29"}