{"id":"https://openalex.org/W2899396210","doi":"https://doi.org/10.1145/3280851","title":"Exposing Memory Access Patterns to Improve Instruction and Memory Efficiency in GPUs","display_name":"Exposing Memory Access Patterns to Improve Instruction and Memory Efficiency in GPUs","publication_year":2018,"publication_date":"2018-10-29","ids":{"openalex":"https://openalex.org/W2899396210","doi":"https://doi.org/10.1145/3280851","mag":"2899396210"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3280851","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3280851","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3280851","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060189926","display_name":"Neal Crago","orcid":"https://orcid.org/0000-0001-7774-0531"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Neal C. Crago","raw_affiliation_strings":["NVIDIA, Santa Clara, CA"],"affiliations":[{"raw_affiliation_string":"NVIDIA, Santa Clara, CA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015391591","display_name":"Mark W. Stephenson","orcid":"https://orcid.org/0000-0002-1350-0165"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mark Stephenson","raw_affiliation_strings":["NVIDIA, Santa Clara, CA"],"affiliations":[{"raw_affiliation_string":"NVIDIA, Santa Clara, CA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112796474","display_name":"Stephen W. Keckler","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stephen W. Keckler","raw_affiliation_strings":["NVIDIA, Santa Clara, CA"],"affiliations":[{"raw_affiliation_string":"NVIDIA, Santa Clara, CA","institution_ids":["https://openalex.org/I4210127875"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.268,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":2,"citation_normalized_percentile":{"value":0.602335,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":70,"max":74},"biblio":{"volume":"15","issue":"4","first_page":"1","last_page":"23"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9983,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9955,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9158783},{"id":"https://openalex.org/C150202949","wikidata":"https://www.wikidata.org/wiki/Q107602","display_name":"Pointer (user interface)","level":2,"score":0.5503711},{"id":"https://openalex.org/C63511323","wikidata":"https://www.wikidata.org/wiki/Q908936","display_name":"Interleaved memory","level":4,"score":0.5396656},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5191623},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.479408},{"id":"https://openalex.org/C41036726","wikidata":"https://www.wikidata.org/wiki/Q844824","display_name":"Physical address","level":3,"score":0.4371964},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.42874393},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.38978946},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3411134},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3286401},{"id":"https://openalex.org/C98986596","wikidata":"https://www.wikidata.org/wiki/Q1143031","display_name":"Semiconductor memory","level":2,"score":0.32437626},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.29055333},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3280851","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3280851","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3280851","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3280851","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.9,"id":"https://metadata.un.org/sdg/7"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":45,"referenced_works":["https://openalex.org/W1494930385","https://openalex.org/W1988061915","https://openalex.org/W2009711341","https://openalex.org/W2026186397","https://openalex.org/W2029618796","https://openalex.org/W2047060659","https://openalex.org/W2080592089","https://openalex.org/W2081583983","https://openalex.org/W2093043622","https://openalex.org/W2095954861","https://openalex.org/W2096765878","https://openalex.org/W2098040113","https://openalex.org/W2098290747","https://openalex.org/W2098505406","https://openalex.org/W2099958604","https://openalex.org/W2103330947","https://openalex.org/W2107333973","https://openalex.org/W2111394443","https://openalex.org/W2115172404","https://openalex.org/W2118826546","https://openalex.org/W2125305952","https://openalex.org/W2129232868","https://openalex.org/W2134633067","https://openalex.org/W2141280299","https://openalex.org/W2142444503","https://openalex.org/W2144481293","https://openalex.org/W2148041475","https://openalex.org/W2152956697","https://openalex.org/W2153185479","https://openalex.org/W2162838417","https://openalex.org/W2164215436","https://openalex.org/W2166918318","https://openalex.org/W2170382128","https://openalex.org/W2171641226","https://openalex.org/W2234355962","https://openalex.org/W2273440736","https://openalex.org/W2613066110","https://openalex.org/W3030752614","https://openalex.org/W3138340923","https://openalex.org/W3152438252","https://openalex.org/W4236382111","https://openalex.org/W4236468694","https://openalex.org/W4239813889","https://openalex.org/W4250511340","https://openalex.org/W4252742548"],"related_works":["https://openalex.org/W4378806055","https://openalex.org/W4312264564","https://openalex.org/W2999459628","https://openalex.org/W2954208483","https://openalex.org/W2526783553","https://openalex.org/W2386349366","https://openalex.org/W2199439667","https://openalex.org/W2043352873","https://openalex.org/W1554378476","https://openalex.org/W1487697053"],"abstract_inverted_index":{"Modern":[0],"computing":[1],"workloads":[2,20],"often":[3],"have":[4],"high":[5,9,43],"memory":[6,15,52,59,84],"intensity,":[7],"requiring":[8],"bandwidth":[10],"access":[11],"to":[12,78],"memory.":[13],"The":[14,68],"request":[16,60,85],"patterns":[17,61],"of":[18,37,45,100,115,120,128],"these":[19],"vary":[21],"and":[22,27,41,57,62,81,122],"include":[23],"regular":[24],"strided":[25,56],"accesses":[26],"indirect":[28,58],"(pointer-based)":[29],"accesses.":[30],"Such":[31],"applications":[32],"require":[33],"a":[34,42,123],"large":[35],"number":[36],"address":[38,72],"generation":[39],"instructions":[40,53,70,74,102],"degree":[44],"memory-level":[46],"parallelism.":[47],"This":[48],"article":[49],"proposes":[50],"new":[51,69],"that":[54,95],"exploit":[55],"improve":[63],"efficiency":[64],"in":[65,110,125],"GPU":[66,105],"architectures.":[67],"reduce":[71,82],"calculation":[73],"by":[75,87],"offloading":[76],"addressing":[77],"dedicated":[79],"hardware,":[80],"destructive":[83],"interference":[86],"grouping":[88],"related":[89],"requests":[90],"together.":[91],"Our":[92],"results":[93],"show":[94],"we":[96],"can":[97],"eliminate":[98],"33%":[99],"dynamic":[101],"across":[103],"16":[104],"benchmarks.":[106],"These":[107],"improvements":[108],"result":[109],"an":[111,117],"overall":[112],"runtime":[113],"improvement":[114],"26%,":[116],"energy":[118],"reduction":[119,124],"18%,":[121],"energy-delay":[126],"product":[127],"32%.":[129]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2899396210","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2024-12-09T12:32:38.633699","created_date":"2018-11-09"}