{"id":"https://openalex.org/W4399117317","doi":"https://doi.org/10.1145/3634769.3634807","title":"WattWiser: Power & Resource-Efficient Scheduling for Multi-Model Multi-GPU Inference Servers","display_name":"WattWiser: Power & Resource-Efficient Scheduling for Multi-Model Multi-GPU Inference Servers","publication_year":2023,"publication_date":"2023-10-28","ids":{"openalex":"https://openalex.org/W4399117317","doi":"https://doi.org/10.1145/3634769.3634807"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3634769.3634807","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3634769.3634807","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3634769.3634807","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004077070","display_name":"Ali Jahanshahi","orcid":"https://orcid.org/0000-0002-4301-7588"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ali Jahanshahi","raw_affiliation_strings":["Department of Computer Science & Engineering, University of California, Riverside, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science & Engineering, University of California, Riverside, USA","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017663084","display_name":"Mohammadreza Rezvani","orcid":"https://orcid.org/0009-0009-2822-777X"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mohammadreza Rezvani","raw_affiliation_strings":["Department of Computer Science & Engineering, University of California, Riverside, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science & Engineering, University of California, Riverside, USA","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000712719","display_name":"Daniel Wong","orcid":"https://orcid.org/0000-0002-5376-7868"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel Wong","raw_affiliation_strings":["Department of Electrical & Computer Engineering, University of California, Riverside, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical & Computer Engineering, University of California, Riverside, USA","institution_ids":["https://openalex.org/I103635307"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.438,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.633114,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":67,"max":78},"biblio":{"volume":null,"issue":null,"first_page":"39","last_page":"44"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9987,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/processor-scheduling","display_name":"Processor scheduling","score":0.44982356}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.83852965},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.78105104},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6362884},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.58064115},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.49221745},{"id":"https://openalex.org/C2984822820","wikidata":"https://www.wikidata.org/wiki/Q1123036","display_name":"Processor scheduling","level":3,"score":0.44982356},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.44457585},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.3188649},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.24852121},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.22163862},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.1236105},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.0796715},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3634769.3634807","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3634769.3634807","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3634769.3634807","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3634769.3634807","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"score":0.43,"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":3,"referenced_works":["https://openalex.org/W3084790829","https://openalex.org/W3097411828","https://openalex.org/W4293025059"],"related_works":["https://openalex.org/W3140149227","https://openalex.org/W3023290810","https://openalex.org/W2434525066","https://openalex.org/W2130555437","https://openalex.org/W2128410848","https://openalex.org/W2126232624","https://openalex.org/W2118368532","https://openalex.org/W2102390841","https://openalex.org/W2101285930","https://openalex.org/W1809394610"],"abstract_inverted_index":{"With":[0],"the":[1,59,76],"increasing":[2],"integration":[3],"of":[4,81],"Machine":[5,15],"Learning":[6,16],"(ML)":[7],"applications":[8],"into":[9],"cloud":[10,25],"services,":[11],"providing":[12],"high":[13],"throughput":[14],"inference":[17,29,50],"serving":[18],"has":[19],"become":[20],"a":[21,42,79],"major":[22],"demand":[23],"for":[24,37],"service":[26],"providers.":[27],"The":[28],"requests":[30],"need":[31],"to":[32,40,57,74,78,89],"respond":[33],"with":[34,54],"bounded":[35],"latency":[36],"each":[38],"request":[39],"maintain":[41],"consistent":[43],"Service-Level":[44],"Objective":[45],"(SLO).":[46],"To":[47,68],"ensure":[48],"SLO,":[49],"servers":[51],"are":[52,65],"equipped":[53],"multiple":[55],"GPUs":[56],"satisfy":[58],"computational":[60],"requirements.":[61],"However,":[62],"multi-GPU":[63],"systems":[64],"extremely":[66],"power-hungry.":[67],"resolve":[69],"this,":[70],"it":[71],"is":[72],"ideal":[73],"consolidate":[75],"load":[77],"sub-set":[80],"GPUs,":[82,86],"and":[83],"potentially":[84],"share":[85],"in":[87],"order":[88],"minimize":[90],"power":[91],"consumption,":[92],"without":[93],"violating":[94],"SLO.":[95]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4399117317","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2024-12-31T05:33:27.475352","created_date":"2024-05-30"}