{"id":"https://openalex.org/W4313886488","doi":"https://doi.org/10.1109/cloudcom55334.2022.00033","title":"GPU Performance Tuning and Power Efficiency on the DGX A100 Cluster","display_name":"GPU Performance Tuning and Power Efficiency on the DGX A100 Cluster","publication_year":2022,"publication_date":"2022-12-01","ids":{"openalex":"https://openalex.org/W4313886488","doi":"https://doi.org/10.1109/cloudcom55334.2022.00033"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/cloudcom55334.2022.00033","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007655321","display_name":"Khanin Udomchoksakul","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Khanin Udomchoksakul","raw_affiliation_strings":["Electrical and Computer Engineering, Carnegie Mellon University, Pittsburgh, USA"],"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, Carnegie Mellon University, Pittsburgh, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076498926","display_name":"Orathai Sangpetch","orcid":"https://orcid.org/0000-0001-8417-825X"},"institutions":[],"countries":["TH"],"is_corresponding":false,"raw_author_name":"Orathai Sangpetch","raw_affiliation_strings":["Carnegie Mellon-KMITL Thailand Program, CMKL University, Bangkok, Thailand"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon-KMITL Thailand Program, CMKL University, Bangkok, Thailand","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074244000","display_name":"Akkarit Sangpetch","orcid":"https://orcid.org/0000-0003-4698-9160"},"institutions":[],"countries":["TH"],"is_corresponding":false,"raw_author_name":"Akkarit Sangpetch","raw_affiliation_strings":["Carnegie Mellon-KMITL Thailand Program, CMKL University, Bangkok, Thailand"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon-KMITL Thailand Program, CMKL University, Bangkok, Thailand","institution_ids":[]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":60},"biblio":{"volume":null,"issue":null,"first_page":"170","last_page":"177"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9981,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9981,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9977,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9972,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.58848476},{"id":"https://openalex.org/keywords/milestone","display_name":"Milestone","score":0.46664336}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7832253},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.6536403},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.5885095},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.58848476},{"id":"https://openalex.org/C120060458","wikidata":"https://www.wikidata.org/wiki/Q10145","display_name":"Milestone","level":2,"score":0.46664336},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.45708063},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.295794},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.25584358},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.20202947},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/cloudcom55334.2022.00033","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.52,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":3,"referenced_works":["https://openalex.org/W2966274836","https://openalex.org/W3118922386","https://openalex.org/W3206396201"],"related_works":["https://openalex.org/W4387878404","https://openalex.org/W3140574787","https://openalex.org/W3133583653","https://openalex.org/W2990251955","https://openalex.org/W2955244041","https://openalex.org/W2584040191","https://openalex.org/W2561695978","https://openalex.org/W2490667451","https://openalex.org/W2064167013","https://openalex.org/W1950486549"],"abstract_inverted_index":{"The":[0],"complexity":[1],"of":[2,179,187],"current":[3,209],"Deep":[4,169],"learning":[5,170],"has":[6,36,69],"been":[7,71],"growing":[8],"rapidly":[9],"nowadays.":[10],"Such":[11],"advancement":[12],"allows":[13],"various":[14],"organizations":[15],"such":[16,78,94],"as":[17,79,214],"private":[18],"sectors":[19],"and":[20,43,163],"government":[21],"to":[22,38,45,103,110,113,132,160,168,177,191,207],"leverage":[23],"intelligent":[24],"systems":[25],"on":[26,62,65,74,87,172],"their":[27],"use":[28],"cases.":[29],"High":[30],"Performance":[31],"Computing":[32],"(HPC)":[33],"infrastructure":[34],"nowadays":[35],"pivoted":[37],"GPU-oriented":[39],"systems,":[40],"enabling":[41],"developers":[42],"researchers":[44],"train":[46],"complex":[47],"models":[48],"with":[49,57],"large":[50],"datasets":[51],"unlike":[52],"conventional":[53],"clusters":[54],"equipped":[55],"only":[56,185],"CPU":[58],"cores.":[59],"However,":[60],"focus":[61],"power":[63,140,181],"efficiency":[64],"the":[66,75,106,114,118,125,134,148,173,180,196,208,215],"HPC":[67,95,115],"system":[68,77,210],"not":[70,84],"prevalent":[72],"especially":[73],"new":[76],"DGX":[80],"A100":[81,154],"that":[82,157,201],"does":[83],"have":[85],"datapoints":[86],"how":[88],"GPUs":[89],"consumed":[90],"power.":[91],"Even":[92],"though":[93],"cluster":[96,135],"can":[97,143],"be":[98,145,203],"powerful,":[99],"always":[100],"allowing":[101],"it":[102,127],"run":[104],"at":[105,117,211],"maximum":[107],"capacity":[108],"results":[109],"financial":[111],"cost":[112],"provider":[116],"end.":[119],"Therefore,":[120],"for":[121,130],"any":[122],"organization":[123],"providing":[124],"system,":[126],"is":[128],"crucial":[129],"them":[131],"balance":[133],"capabilities":[136],"while":[137,183],"maintaining":[138],"overall":[139],"consumption":[141],"which":[142],"potentially":[144],"costly":[146],"in":[147],"long":[149],"term.":[150],"This":[151],"paper":[152,197],"reveals":[153],"GPU":[155,165],"metrics":[156],"are":[158],"relevant":[159],"Power":[161],"usage":[162,182],"explains":[164],"profiling":[166],"applied":[167],"workload":[171],"cluster,":[174],"saving":[175],"up":[176],"32%":[178],"compromising":[184],"11.5%":[186],"training":[188],"time":[189],"compared":[190],"a":[192],"default":[193],"profile.":[194],"Then,":[195],"investigates":[198],"literature":[199],"review":[200],"could":[202],"learned":[204],"further":[205],"adopted":[206],"CMKL":[212],"university":[213],"next":[216],"milestone.":[217]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4313886488","counts_by_year":[],"updated_date":"2024-12-12T22:47:59.994809","created_date":"2023-01-10"}