{"id":"https://openalex.org/W4225293733","doi":"https://doi.org/10.1109/irps48227.2022.9764554","title":"Reliability, Availability, and Serviceability Challenges for Heterogeneous System Design","display_name":"Reliability, Availability, and Serviceability Challenges for Heterogeneous System Design","publication_year":2022,"publication_date":"2022-03-01","ids":{"openalex":"https://openalex.org/W4225293733","doi":"https://doi.org/10.1109/irps48227.2022.9764554"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/irps48227.2022.9764554","pdf_url":null,"source":{"id":"https://openalex.org/S4363605693","display_name":"2022 IEEE International Reliability Physics Symposium (IRPS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087590295","display_name":"Majed Valad Beigi","orcid":"https://orcid.org/0009-0004-0662-3214"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Majed Valad Beigi","raw_affiliation_strings":["RAS Architecture AMD, Inc., Boxborough, MA"],"affiliations":[{"raw_affiliation_string":"RAS Architecture AMD, Inc., Boxborough, MA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078748445","display_name":"Sudhanva Gurumurthi","orcid":"https://orcid.org/0000-0002-1740-7304"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sudhanva Gurumurthi","raw_affiliation_strings":["RAS Architecture AMD, Inc., Austin, TX"],"affiliations":[{"raw_affiliation_string":"RAS Architecture AMD, Inc., Austin, TX","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061044305","display_name":"Vilas Sridharan","orcid":"https://orcid.org/0000-0002-2944-2799"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vilas Sridharan","raw_affiliation_strings":["RAS Architecture AMD, Inc., Boxborough, MA"],"affiliations":[{"raw_affiliation_string":"RAS Architecture AMD, Inc., Boxborough, MA","institution_ids":[]}]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.782,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.79455,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":83,"max":85},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11005","display_name":"Radiation Effects in Electronics","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11005","display_name":"Radiation Effects in Electronics","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9981,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9974,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/serviceability","display_name":"Serviceability (structure)","score":0.9170634}],"concepts":[{"id":"https://openalex.org/C110245778","wikidata":"https://www.wikidata.org/wiki/Q2169658","display_name":"Serviceability (structure)","level":2,"score":0.9170634},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7644719},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.63211316},{"id":"https://openalex.org/C188087704","wikidata":"https://www.wikidata.org/wiki/Q369577","display_name":"Standardization","level":2,"score":0.5909074},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5773055},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.47645834},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.45218888},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.4363455},{"id":"https://openalex.org/C200601418","wikidata":"https://www.wikidata.org/wiki/Q2193887","display_name":"Reliability engineering","level":1,"score":0.43049452},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.4205287},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.35557887},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.15254131},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.13380608},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/irps48227.2022.9764554","pdf_url":null,"source":{"id":"https://openalex.org/S4363605693","display_name":"2022 IEEE International Reliability Physics Symposium (IRPS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.42,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":13,"referenced_works":["https://openalex.org/W1558516248","https://openalex.org/W1559781097","https://openalex.org/W1967732260","https://openalex.org/W1978082708","https://openalex.org/W2053608561","https://openalex.org/W2138815251","https://openalex.org/W2145071552","https://openalex.org/W2751410428","https://openalex.org/W2782944865","https://openalex.org/W3158614068","https://openalex.org/W3204625459","https://openalex.org/W4248895726","https://openalex.org/W4249144718"],"related_works":["https://openalex.org/W75531337","https://openalex.org/W4234058849","https://openalex.org/W328308450","https://openalex.org/W3151946113","https://openalex.org/W2942738261","https://openalex.org/W282641168","https://openalex.org/W2378767206","https://openalex.org/W2370611442","https://openalex.org/W2097451945","https://openalex.org/W1540871478"],"abstract_inverted_index":{"The":[0,104],"demand":[1],"for":[2,61,146,154],"high-performance":[3,16,67],"computation":[4],"continues":[5],"to":[6,77,148],"accelerate.":[7],"To":[8],"satisfy":[9],"this":[10],"increasing":[11,37],"demand,":[12],"modern":[13],"server":[14,65],"and":[15,26,40,57,66,102,115,133],"computing":[17,68],"systems":[18,114],"are":[19,52],"increasingly":[20],"deploying":[21,82],"nodes":[22,62,83],"with":[23],"greater":[24],"heterogeneity":[25,35,101],"integration.":[27,103],"Both":[28],"trends":[29],"reduce":[30],"the":[31,53,94,121,130,141],"cost":[32,44],"of":[33,73,120],"computation:":[34],"by":[36,42,99],"absolute":[38],"performance,":[39],"integration":[41],"reducing":[43],"per":[45],"computation.":[46],"What":[47],"has":[48],"not":[49],"changed,":[50],"however,":[51],"stringent":[54],"reliability,":[55],"availability,":[56],"serviceability":[58],"(RAS)":[59],"requirements":[60],"used":[63],"in":[64,150],"systems.":[69],"A":[70],"high":[71],"level":[72],"RAS":[74,96,122],"is":[75],"required":[76],"ensure":[78],"that":[79],"data":[80,110,156],"centers":[81],"can":[84],"correctly":[85],"perform":[86],"computations":[87],"over":[88],"their":[89],"expected":[90],"lifetime.This":[91],"paper":[92,105,142],"describes":[93],"increased":[95,100],"challenges":[97,108],"posed":[98],"motivates":[106],"these":[107,152],"using":[109],"gathered":[111],"from":[112],"production":[113],"presents":[116],"a":[117,125,144],"case":[118],"study":[119],"implementation":[123],"on":[124],"highly":[126],"integrated,":[127],"heterogeneous":[128,137],"node:":[129],"AMD":[131,134],"EPYC\u2122":[132],"Instinct\u2122":[135],"MI250X":[136],"compute":[138],"node.":[139],"Finally,":[140],"provides":[143],"call":[145],"standardization":[147],"aid":[149],"meeting":[151],"goals":[153],"future":[155],"centers.":[157]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4225293733","counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3}],"updated_date":"2025-01-08T07:15:29.496024","created_date":"2022-05-05"}