{"id":"https://openalex.org/W3180683307","doi":"https://doi.org/10.1145/3457886","title":"Specializing FGPU for Persistent Deep Learning","display_name":"Specializing FGPU for Persistent Deep Learning","publication_year":2021,"publication_date":"2021-06-30","ids":{"openalex":"https://openalex.org/W3180683307","doi":"https://doi.org/10.1145/3457886","mag":"3180683307"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3457886","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3457886","source":{"id":"https://openalex.org/S112809824","display_name":"ACM Transactions on Reconfigurable Technology and Systems","issn_l":"1936-7406","issn":["1936-7406","1936-7414"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3457886","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084165072","display_name":"Rui Ma","orcid":"https://orcid.org/0000-0001-9611-5870"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rui Ma","raw_affiliation_strings":["The University of Texas at Austin, USA"],"affiliations":[{"raw_affiliation_string":"The University of Texas at Austin, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037217064","display_name":"Jia-Ching Hsu","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jia-Ching Hsu","raw_affiliation_strings":["The University of Texas at Austin, USA"],"affiliations":[{"raw_affiliation_string":"The University of Texas at Austin, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003764368","display_name":"Tian Tan","orcid":"https://orcid.org/0000-0001-5799-8199"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tian Tan","raw_affiliation_strings":["The University of Texas at Austin, USA"],"affiliations":[{"raw_affiliation_string":"The University of Texas at Austin, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084078152","display_name":"Eriko Nurvitadhi","orcid":"https://orcid.org/0000-0002-2347-9590"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Eriko Nurvitadhi","raw_affiliation_strings":["Intel Corporation, USA"],"affiliations":[{"raw_affiliation_string":"Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063801377","display_name":"David Sheffield","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Sheffield","raw_affiliation_strings":["Intel Corporation, USA"],"affiliations":[{"raw_affiliation_string":"Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079835130","display_name":"Rob Pelt","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rob Pelt","raw_affiliation_strings":["Intel Corporation, USA"],"affiliations":[{"raw_affiliation_string":"Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082265695","display_name":"Martin Langhammer","orcid":"https://orcid.org/0000-0001-8206-2077"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Martin Langhammer","raw_affiliation_strings":["Intel Corporation, USA"],"affiliations":[{"raw_affiliation_string":"Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004251886","display_name":"Jaewoong Sim","orcid":"https://orcid.org/0000-0002-0403-9928"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jaewoong Sim","raw_affiliation_strings":["Intel Corporation, USA"],"affiliations":[{"raw_affiliation_string":"Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111931430","display_name":"Aravind Dasu","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aravind Dasu","raw_affiliation_strings":["Intel Corporation, USA"],"affiliations":[{"raw_affiliation_string":"Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030464643","display_name":"Derek Chiou","orcid":"https://orcid.org/0009-0008-6762-4527"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Derek Chiou","raw_affiliation_strings":["Microsoft and The University of Texas at Austin, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft and The University of Texas at Austin, USA","institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I86519309"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.341,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.341687,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":76,"max":79},"biblio":{"volume":"14","issue":"2","first_page":"1","last_page":"23"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9965,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9965,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9963,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9928,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stratix","display_name":"Stratix","score":0.6346855},{"id":"https://openalex.org/keywords/generality","display_name":"Generality","score":0.4787378}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9024222},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.7298211},{"id":"https://openalex.org/C2776277307","wikidata":"https://www.wikidata.org/wiki/Q22074755","display_name":"Stratix","level":3,"score":0.6346855},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.5749745},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.5548582},{"id":"https://openalex.org/C2780767217","wikidata":"https://www.wikidata.org/wiki/Q5532421","display_name":"Generality","level":2,"score":0.4787378},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.46474358},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.46031922},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.14892516},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C542102704","wikidata":"https://www.wikidata.org/wiki/Q183257","display_name":"Psychotherapist","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3457886","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3457886","source":{"id":"https://openalex.org/S112809824","display_name":"ACM Transactions on Reconfigurable Technology and Systems","issn_l":"1936-7406","issn":["1936-7406","1936-7414"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3457886","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3457886","source":{"id":"https://openalex.org/S112809824","display_name":"ACM Transactions on Reconfigurable Technology and Systems","issn_l":"1936-7406","issn":["1936-7406","1936-7414"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.5,"display_name":"Industry, innovation and infrastructure"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":17,"referenced_works":["https://openalex.org/W1523401229","https://openalex.org/W2037748535","https://openalex.org/W2048166678","https://openalex.org/W2097959379","https://openalex.org/W2182879957","https://openalex.org/W2585720638","https://openalex.org/W2588448445","https://openalex.org/W2589086007","https://openalex.org/W2727238169","https://openalex.org/W2798956872","https://openalex.org/W2934853022","https://openalex.org/W2963608065","https://openalex.org/W2983713406","https://openalex.org/W2996795721","https://openalex.org/W3034833480","https://openalex.org/W3040903763","https://openalex.org/W4212902747"],"related_works":["https://openalex.org/W4254372399","https://openalex.org/W3208151864","https://openalex.org/W2518118925","https://openalex.org/W2386041993","https://openalex.org/W2160474882","https://openalex.org/W2150008128","https://openalex.org/W1967938402","https://openalex.org/W1608572506","https://openalex.org/W1564576805","https://openalex.org/W1509155667"],"abstract_inverted_index":{"Overlay":[0],"architectures":[1,37],"are":[2],"a":[3,124],"good":[4],"way":[5],"to":[6,22,98,144,185],"enable":[7],"fast":[8],"development":[9],"and":[10,41,51,75,110,119,141,155,189],"debug":[11],"on":[12,123,174],"FPGAs":[13],"at":[14],"the":[15,73,87,116,120,179,198],"expense":[16],"of":[17,45,77],"potentially":[18],"limited":[19],"performance":[20,83,163],"compared":[21],"fully":[23],"customized":[24],"FPGA":[25,33,46,131],"designs.":[26],"When":[27],"used":[28],"in":[29,132],"concert":[30],"with":[31,108,169],"hand-tuned":[32],"solutions,":[34],"performant":[35],"overlay":[36,59,181],"can":[38],"improve":[39],"time-to-solution":[40],"thus":[42],"overall":[43],"productivity":[44],"solutions.":[47],"This":[48],"work":[49],"tunes":[50],"specializes":[52],"FGPU,":[53],"an":[54,95,170],"open":[55],"source":[56],"OpenCL-programmable":[57],"GPU":[58,78],"for":[60,86,100,166],"FPGAs.":[61],"We":[62,92,113,177],"demonstrate":[63,145,190],"that":[64,191],"our":[65],"persistent":[66,88,135],"deep":[67,89],"learning":[68,90],"(PDL":[69],")-FGPU":[70],"architecture":[71],"maintains":[72],"ease-of-programming":[74],"generality":[76],"programming":[79],"while":[80],"achieving":[81],"high":[82],"from":[84],"specialization":[85],"domain.":[91],"also":[93],"propose":[94],"easy":[96],"method":[97],"specialize":[99],"other":[101],"domains.":[102],"PDL-FGPU":[103,122,147,180,192],"includes":[104],"new":[105],"instructions,":[106],"along":[107],"micro-architecture":[109],"compiler":[111],"enhancements.":[112],"evaluate":[114],"both":[115],"FGPU":[117],"baseline":[118],"proposed":[121],"modern":[125],"high-end":[126],"Intel":[127,183],"Stratix":[128],"10":[129],"2800":[130],"simulation":[133],"running":[134],"DL":[136],"applications":[137,143,168],"(RNN,":[138],"GRU,":[139],"LSTM),":[140],"non-DL":[142],"generality.":[146],"requires":[148],"1.4\u20133\u00d7":[149],"more":[150,153,157],"ALMs,":[151],"4.4\u20136.4\u00d7":[152],"M20ks,":[154],"1\u20139.5\u00d7":[156],"DSPs":[158],"than":[159,197],"baseline,":[160],"but":[161],"improves":[162],"by":[164],"56\u2013693\u00d7":[165],"PDL":[167],"average":[171],"23.1%":[172],"degradation":[173],"non-PDL":[175],"applications.":[176],"integrated":[178],"into":[182],"OPAE":[184],"measure":[186],"real-world":[187],"performance/power":[188],"is":[193],"only":[194],"4.0\u201310.4\u00d7":[195],"slower":[196],"Nvidia":[199],"V100.":[200]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3180683307","counts_by_year":[{"year":2022,"cited_by_count":4}],"updated_date":"2024-12-06T01:59:49.686097","created_date":"2021-07-19"}