{"id":"https://openalex.org/W4368353274","doi":"https://doi.org/10.1145/3578356.3592578","title":"Reconciling High Accuracy, Cost-Efficiency, and Low Latency of Inference Serving Systems","display_name":"Reconciling High Accuracy, Cost-Efficiency, and Low Latency of Inference Serving Systems","publication_year":2023,"publication_date":"2023-05-04","ids":{"openalex":"https://openalex.org/W4368353274","doi":"https://doi.org/10.1145/3578356.3592578"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3578356.3592578","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"proceedings-article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2304.10892","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091376011","display_name":"Mehran Salmani","orcid":"https://orcid.org/0009-0008-6362-0967"},"institutions":[{"id":"https://openalex.org/I67009956","display_name":"Iran University of Science and Technology","ror":"https://ror.org/01jw2p796","country_code":"IR","type":"education","lineage":["https://openalex.org/I67009956"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Mehran Salmani","raw_affiliation_strings":["Iran University of Science and Technology, Tehran, Iran"],"affiliations":[{"raw_affiliation_string":"Iran University of Science and Technology, Tehran, Iran","institution_ids":["https://openalex.org/I67009956"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019689955","display_name":"Saeid Ghafouri","orcid":"https://orcid.org/0000-0003-3799-5702"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Saeid Ghafouri","raw_affiliation_strings":["Queen Mary University of London, London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Queen Mary University of London, London, United Kingdom","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112907002","display_name":"Alireza Sanaee","orcid":null},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Alireza Sanaee","raw_affiliation_strings":["Queen Mary University of London, London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Queen Mary University of London, London, United Kingdom","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045974365","display_name":"Kamran Razavi","orcid":"https://orcid.org/0000-0002-3232-5657"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technical University of Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Kamran Razavi","raw_affiliation_strings":["Technical University of Darmstadt, Darmstadt, Germany"],"affiliations":[{"raw_affiliation_string":"Technical University of Darmstadt, Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021712397","display_name":"Max M\u00fchlh\u00e4user","orcid":"https://orcid.org/0000-0003-4713-5327"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technical University of Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Max M\u00fchlh\u00e4user","raw_affiliation_strings":["Technical University of Darmstadt, Darmstadt, Germany"],"affiliations":[{"raw_affiliation_string":"Technical University of Darmstadt, Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002561390","display_name":"Joseph Doyle","orcid":"https://orcid.org/0000-0003-1840-9616"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Joseph Doyle","raw_affiliation_strings":["Queen Mary University of London, London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Queen Mary University of London, London, United Kingdom","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064540250","display_name":"Pooyan Jamshidi","orcid":"https://orcid.org/0000-0002-9342-0703"},"institutions":[{"id":"https://openalex.org/I155781252","display_name":"University of South Carolina","ror":"https://ror.org/02b6qw903","country_code":"US","type":"education","lineage":["https://openalex.org/I155781252"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pooyan Jamshidi","raw_affiliation_strings":["University of South Carolina, Columbia, USA"],"affiliations":[{"raw_affiliation_string":"University of South Carolina, Columbia, USA","institution_ids":["https://openalex.org/I155781252"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069663779","display_name":"Mohsen Sharifi","orcid":"https://orcid.org/0000-0003-4992-2500"},"institutions":[{"id":"https://openalex.org/I67009956","display_name":"Iran University of Science and Technology","ror":"https://ror.org/01jw2p796","country_code":"IR","type":"education","lineage":["https://openalex.org/I67009956"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Mohsen Sharifi","raw_affiliation_strings":["Iran University of Science and Technology, Tehran, Iran"],"affiliations":[{"raw_affiliation_string":"Iran University of Science and Technology, Tehran, Iran","institution_ids":["https://openalex.org/I67009956"]}]}],"institution_assertions":[],"countries_distinct_count":4,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":11,"citation_normalized_percentile":{"value":0.999714,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"78","last_page":"86"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9989,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9989,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.9961,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9961,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cost-reduction","display_name":"Cost reduction","score":0.41463196},{"id":"https://openalex.org/keywords/resource-efficiency","display_name":"Resource Efficiency","score":0.4142945}],"concepts":[{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.84840566},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.8046074},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.786476},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4662743},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.4341963},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.42258772},{"id":"https://openalex.org/C2778820799","wikidata":"https://www.wikidata.org/wiki/Q3454688","display_name":"Cost reduction","level":2,"score":0.41463196},{"id":"https://openalex.org/C2777958785","wikidata":"https://www.wikidata.org/wiki/Q17120940","display_name":"Resource efficiency","level":2,"score":0.4142945},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.22817075},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.22068849},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.076236635},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3578356.3592578","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2304.10892","pdf_url":"https://arxiv.org/pdf/2304.10892","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://qmro.qmul.ac.uk/xmlui/handle/123456789/85868","pdf_url":"https://qmro.qmul.ac.uk/xmlui/bitstream/123456789/85868/2/Doyle%20Reconciling%20High%20Accuracy%202023%20Accepted.pdf","source":{"id":"https://openalex.org/S4306400530","display_name":"Queen Mary Research Online (Queen Mary University of London)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I166337079","host_organization_name":"Queen Mary University of London","host_organization_lineage":["https://openalex.org/I166337079"],"host_organization_lineage_names":["Queen Mary University of London"],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2304.10892","pdf_url":"https://arxiv.org/pdf/2304.10892","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"display_name":"Decent work and economic growth","score":0.47,"id":"https://metadata.un.org/sdg/8"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":11,"referenced_works":["https://openalex.org/W2075233755","https://openalex.org/W2734941459","https://openalex.org/W2772948367","https://openalex.org/W2888975141","https://openalex.org/W2982157693","https://openalex.org/W3023238978","https://openalex.org/W3129469040","https://openalex.org/W3209166877","https://openalex.org/W3210776666","https://openalex.org/W4283727273","https://openalex.org/W95608104"],"related_works":["https://openalex.org/W9168048","https://openalex.org/W4376480820","https://openalex.org/W4308600690","https://openalex.org/W4300849822","https://openalex.org/W4286899009","https://openalex.org/W3205411230","https://openalex.org/W3176411177","https://openalex.org/W3155891479","https://openalex.org/W3128807919","https://openalex.org/W3029351463"],"abstract_inverted_index":{"The":[0],"use":[1],"of":[2,31,65,86,104],"machine":[3],"learning":[4],"(ML)":[5],"inference":[6,14],"for":[7],"various":[8],"applications":[9],"is":[10,71],"growing":[11],"drastically.":[12],"ML":[13,87],"services":[15,27],"engage":[16],"with":[17,90],"users":[18],"directly,":[19],"requiring":[20],"fast":[21],"and":[22,68,106,112,117],"accurate":[23],"responses.":[24],"Moreover,":[25],"these":[26,76],"face":[28],"dynamic":[29,59],"workloads":[30,60],"requests,":[32],"imposing":[33],"changes":[34],"in":[35,45],"their":[36,91],"computing":[37,42,55],"resources.":[38,56],"Failing":[39],"to":[40,58,75,94,115,121],"right-size":[41],"resources":[43],"results":[44],"either":[46],"latency":[47,96],"service":[48],"level":[49],"objectives":[50],"(SLOs)":[51],"violations":[52],"or":[53],"wasted":[54],"Adapting":[57],"considering":[61],"all":[62],"the":[63],"pillars":[64],"accuracy,":[66],"latency,":[67],"resource":[69,92],"cost":[70],"challenging.":[72],"In":[73],"response":[74],"challenges,":[77],"we":[78],"propose":[79],"InfAdapter,":[80],"which":[81],"proactively":[82],"selects":[83],"a":[84,122],"set":[85],"model":[88],"variants":[89],"allocations":[93],"meet":[95],"SLO":[97,110],"while":[98],"maximizing":[99],"an":[100],"objective":[101],"function":[102],"composed":[103],"accuracy":[105],"cost.":[107],"InfAdapter":[108],"decreases":[109],"violation":[111],"costs":[113],"up":[114],"65%":[116],"33%,":[118],"respectively,":[119],"compared":[120],"popular":[123],"industry":[124],"autoscaler":[125],"(Kubernetes":[126],"Vertical":[127],"Pod":[128],"Autoscaler).":[129]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4368353274","counts_by_year":[{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":5}],"updated_date":"2024-12-29T19:14:47.893830","created_date":"2023-05-05"}