{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T10:53:56Z","timestamp":1725792836242},"publisher-location":"New York, NY, USA","reference-count":11,"publisher":"ACM","funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CNS-1955650 and CNS-2047521"],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,28]]},"DOI":"10.1145\/3634769.3634807","type":"proceedings-article","created":{"date-parts":[[2024,5,29]],"date-time":"2024-05-29T20:13:19Z","timestamp":1717013599000},"page":"39-44","update-policy":"http:\/\/dx.doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["WattWiser: Power & Resource-Efficient Scheduling for Multi-Model Multi-GPU Inference Servers"],"prefix":"10.1145","author":[{"ORCID":"http:\/\/orcid.org\/0000-0002-4301-7588","authenticated-orcid":false,"given":"Ali","family":"Jahanshahi","sequence":"first","affiliation":[{"name":"Department of Computer Science & Engineering, University of California, Riverside, USA"}]},{"ORCID":"http:\/\/orcid.org\/0009-0009-2822-777X","authenticated-orcid":false,"given":"Mohammadreza","family":"Rezvani","sequence":"additional","affiliation":[{"name":"Department of Computer Science & Engineering, University of California, Riverside, USA"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-5376-7868","authenticated-orcid":false,"given":"Daniel","family":"Wong","sequence":"additional","affiliation":[{"name":"Department of Electrical & Computer Engineering, University of California, Riverside, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,5,29]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Multi-model machine learning inference serving with gpu spatial partitioning. arXiv preprint arXiv:2109.01611","author":"Choi Seungbeom","year":"2021","unstructured":"Seungbeom Choi, Sunho Lee, Yeonjae Kim, Jongse Park, Youngjin Kwon, and Jaehyuk Huh. 2021. Multi-model machine learning inference serving with gpu spatial partitioning. arXiv preprint arXiv:2109.01611 (2021)."},{"key":"e_1_3_2_1_2_1","volume-title":"Serving Heterogeneous Machine Learning Models on Multi-GPU Servers with Spatio-Temporal Sharing. In 2022 USENIX Annual Technical Conference (USENIX ATC 22)","author":"Choi Seungbeom","year":"2022","unstructured":"Seungbeom Choi, Sunho Lee, Yeonjae Kim, Jongse Park, Youngjin Kwon, and Jaehyuk Huh. 2022. Serving Heterogeneous Machine Learning Models on Multi-GPU Servers with Spatio-Temporal Sharing. In 2022 USENIX Annual Technical Conference (USENIX ATC 22). 199\u2013216."},{"key":"e_1_3_2_1_3_1","volume-title":"KRISP: Enabling Kernel-wise Right-sizing for Spatial Partitioned GPU Inference Servers. In 2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA). IEEE.","author":"Chow Marcus","year":"2023","unstructured":"Marcus Chow, Ali Jahanshahi, and Daniel Wong. 2023. KRISP: Enabling Kernel-wise Right-sizing for Spatial Partitioned GPU Inference Servers. In 2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA). IEEE."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421284"},{"volume-title":"ZeroMQ: messaging for many applications. \" O\u2019Reilly Media","author":"Hintjens Pieter","key":"e_1_3_2_1_5_1","unstructured":"Pieter Hintjens. 2013. ZeroMQ: messaging for many applications. \" O\u2019Reilly Media, Inc.\"."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2020.3023723"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530510"},{"key":"e_1_3_2_1_8_1","volume-title":"International Conference on Machine Learning. PMLR, 5731\u20135741","author":"Kosaian Jack","year":"2021","unstructured":"Jack Kosaian, Amar Phanishayee, Matthai Philipose, Debadeepta Dey, and Rashmi Vinayak. 2021. Boosting the throughput and accelerator utilization of specialized cnn inference beyond increasing batch size. In International Conference on Machine Learning. PMLR, 5731\u20135741."},{"key":"e_1_3_2_1_9_1","unstructured":"Nvidia. 2023. Triton Inference Server. https:\/\/docs.nvidia.com\/deeplearning\/triton-inference-server\/user-guide\/docs\/index.html."},{"key":"e_1_3_2_1_10_1","unstructured":"SWIMProjectUCB. 2013. SWIM Project. https:\/\/github.com\/SWIMProjectUCB\/SWIM\/wiki"},{"key":"e_1_3_2_1_11_1","unstructured":"Fuxun Yu Di Wang Longfei Shangguan Minjia Zhang Chenchen Liu and Xiang Chen. 2022. A Survey of Multi-Tenant Deep Learning Inference on GPU. arxiv:2203.09040\u00a0[cs.DC]"}],"event":{"name":"IGSC '23: THE 14th international Green and Sustainable Computing Conference","acronym":"IGSC '23","location":"Toronto ON Canada"},"container-title":["Proceedings of the 14th International Green and Sustainable Computing Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3634769.3634807","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,29]],"date-time":"2024-05-29T20:13:35Z","timestamp":1717013615000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3634769.3634807"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,28]]},"references-count":11,"alternative-id":["10.1145\/3634769.3634807","10.1145\/3634769"],"URL":"https:\/\/doi.org\/10.1145\/3634769.3634807","relation":{},"subject":[],"published":{"date-parts":[[2023,10,28]]},"assertion":[{"value":"2024-05-29","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}