{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,7,2]],"date-time":"2024-07-02T03:53:36Z","timestamp":1719892416850},"reference-count":46,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Journal of Parallel and Distributed Computing"],"published-print":{"date-parts":[[2023,1]]},"DOI":"10.1016\/j.jpdc.2022.09.003","type":"journal-article","created":{"date-parts":[[2022,9,14]],"date-time":"2022-09-14T12:22:08Z","timestamp":1663158128000},"page":"14-23","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Forseti: Dynamic chunk-level reshaping for data processing on heterogeneous clusters"],"prefix":"10.1016","volume":"171","author":[{"given":"Sultan","family":"Alamro","sequence":"first","affiliation":[]},{"given":"Tian","family":"Lan","sequence":"additional","affiliation":[]},{"given":"Suresh","family":"Subramaniam","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.jpdc.2022.09.003_br0010","doi-asserted-by":"crossref","first-page":"61","DOI":"10.1145\/2189750.2150984","article-title":"Tarazu: optimizing mapreduce on heterogeneous clusters","volume":"40","author":"Ahmad","year":"2012","journal-title":"Comput. Archit. News"},{"key":"10.1016\/j.jpdc.2022.09.003_br0020","series-title":"2020 IEEE 26th International Conference on Parallel and Distributed Systems (ICPADS)","first-page":"9","article-title":"Improved mapreduce load balancing through distribution-dependent hash function optimization","author":"Ahmad","year":"2020"},{"key":"10.1016\/j.jpdc.2022.09.003_br0030","doi-asserted-by":"crossref","DOI":"10.1145\/3199524.3199564","article-title":"Straggler mitigation by delayed relaunch of tasks","author":"Aktas","year":"2018","journal-title":"SIGMETRICS Perform. Eval. Rev."},{"key":"10.1016\/j.jpdc.2022.09.003_br0040","series-title":"2018 IEEE International Conference on Communications (ICC)","article-title":"Shed: optimal dynamic cloning to meet application deadlines in cloud","author":"Alamro","year":"2018"},{"key":"10.1016\/j.jpdc.2022.09.003_br0050","doi-asserted-by":"crossref","first-page":"1515","DOI":"10.1109\/TNSM.2020.2986477","article-title":"Shed+: optimal dynamic speculation to meet application deadlines in cloud","volume":"17","author":"Alamro","year":"2020","journal-title":"IEEE Trans. Netw. Serv. Manag."},{"key":"10.1016\/j.jpdc.2022.09.003_br0060","series-title":"14th {USENIX} Symposium on Networked Systems Design and Implementation ({NSDI} 17)","first-page":"469","article-title":"Cherrypick: adaptively unearthing the best cloud configurations for big data analytics","author":"Alipourfard","year":"2017"},{"key":"10.1016\/j.jpdc.2022.09.003_br0070","series-title":"Proceedings of the Sixth Conference on Computer Systems","first-page":"287","article-title":"Scarlett: coping with skewed content popularity in mapreduce clusters","author":"Ananthanarayanan","year":"2011"},{"key":"10.1016\/j.jpdc.2022.09.003_br0080","series-title":"NSDI'13","article-title":"Effective straggler mitigation: attack of the clones","author":"Ananthanarayanan","year":"2013"},{"key":"10.1016\/j.jpdc.2022.09.003_br0090","series-title":"OSDI'10","article-title":"Reining in the outliers in map-reduce clusters using mantri","author":"Ananthanarayanan","year":"2010"},{"key":"10.1016\/j.jpdc.2022.09.003_br0100","series-title":"11th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 14)","first-page":"233","article-title":"End-to-end performance isolation through virtual datacenters","author":"Angel","year":"2014"},{"key":"10.1016\/j.jpdc.2022.09.003_br0110","doi-asserted-by":"crossref","first-page":"22","DOI":"10.1016\/j.future.2014.09.001","article-title":"Mra++: scheduling and data placement on mapreduce for heterogeneous environments","volume":"42","author":"Anjos","year":"2015","journal-title":"Future Gener. Comput. Syst."},{"key":"10.1016\/j.jpdc.2022.09.003_br0120","author":"Apache Software Foundation"},{"key":"10.1016\/j.jpdc.2022.09.003_br0130","series-title":"Presented as Part of the 10th {USENIX} Symposium on Networked Systems Design and Implementation ({NSDI} 13)","first-page":"171","article-title":"Chatty tenants and the cloud network sharing problem","author":"Ballani","year":"2013"},{"key":"10.1016\/j.jpdc.2022.09.003_br0140","series-title":"2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)","first-page":"1078","article-title":"Addressing performance heterogeneity in mapreduce clusters with elastic tasks","author":"Chen","year":"2017"},{"key":"10.1016\/j.jpdc.2022.09.003_br0150","series-title":"Proceedings of the 15th International Middleware Conference","first-page":"97","article-title":"Improving mapreduce performance in heterogeneous environments with adaptive task tuning","author":"Cheng","year":"2014"},{"key":"10.1016\/j.jpdc.2022.09.003_br0160","series-title":"Proceedings of the Sixth ACM Symposium on Cloud Computing","first-page":"139","article-title":"On data skewness, stragglers, and mapreduce progress indicators","author":"Coppa","year":"2015"},{"key":"10.1016\/j.jpdc.2022.09.003_br0170","series-title":"3rd {USENIX} Workshop on Hot Topics in Edge Computing (HotEdge 20)","article-title":"Fast and efficient container startup at the edge via dependency scheduling","author":"Fu","year":"2020"},{"key":"10.1016\/j.jpdc.2022.09.003_br0180","series-title":"Presented as Part of the 2013 {USENIX} Annual Technical Conference ({USENIX}{ATC} 13)","first-page":"61","article-title":"{PIKACHU}: how to rebalance load in optimizing mapreduce on heterogeneous clusters","author":"Gandhi","year":"2013"},{"key":"10.1016\/j.jpdc.2022.09.003_br0190","series-title":"Proceedings of the 21st International Middleware Conference","first-page":"280","article-title":"Fifer: tackling resource underutilization in the serverless era","author":"Gunasekaran","year":"2020"},{"key":"10.1016\/j.jpdc.2022.09.003_br0200","series-title":"Proceedings of the 2nd ACM Symposium on Cloud Computing","first-page":"1","article-title":"No one (cluster) size fits all: automatic cluster sizing for data-intensive analytics","author":"Herodotou","year":"2011"},{"key":"10.1016\/j.jpdc.2022.09.003_br0210","series-title":"Cidr","first-page":"261","article-title":"Starfish: a self-tuning system for big data analytics","author":"Herodotou","year":"2011"},{"key":"10.1016\/j.jpdc.2022.09.003_br0220","series-title":"Cluster, Cloud and Grid Computing (CCGrid), 2012 12th IEEE\/ACM International Symposium on","first-page":"435","article-title":"Maestro: replica-aware map scheduling for mapreduce","author":"Ibrahim","year":"2012"},{"key":"10.1016\/j.jpdc.2022.09.003_br0230","series-title":"Proceedings of the 2015 ACM Conference on Special Interest Group on Data Communication","first-page":"435","article-title":"Silo: predictable message latency in the cloud","author":"Jang","year":"2015"},{"key":"10.1016\/j.jpdc.2022.09.003_br0240","series-title":"SIGCOMM'09","article-title":"The nature of data center traffic: measurements & analysis","author":"Kandula","year":"2009"},{"key":"10.1016\/j.jpdc.2022.09.003_br0250","series-title":"Proceedings of the 1st ACM Symposium on Cloud Computing","first-page":"75","article-title":"Skew-resistant parallel processing of feature-extracting scientific user-defined functions","author":"Kwon","year":"2010"},{"key":"10.1016\/j.jpdc.2022.09.003_br0260","series-title":"Open Cirrus Summit 11","article-title":"A study of skew in mapreduce applications","author":"Kwon","year":"2011"},{"key":"10.1016\/j.jpdc.2022.09.003_br0270","series-title":"Proceedings of the 2012 ACM SIGMOD International Conference on Management of Data","first-page":"25","article-title":"Skewtune: mitigating skew in mapreduce applications","author":"Kwon","year":"2012"},{"key":"10.1016\/j.jpdc.2022.09.003_br0280","series-title":"IEEE INFOCOM 2014-IEEE Conference on Computer Communications","first-page":"2004","article-title":"Online load balancing for mapreduce with skewed data input","author":"Le","year":"2014"},{"key":"10.1016\/j.jpdc.2022.09.003_br0290","doi-asserted-by":"crossref","first-page":"14","DOI":"10.1016\/j.bdr.2014.07.002","article-title":"A dynamic data placement strategy for hadoop in heterogeneous environments","volume":"1","author":"Lee","year":"2014","journal-title":"Big Data Res."},{"key":"10.1016\/j.jpdc.2022.09.003_br0300","series-title":"12th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 16)","first-page":"383","article-title":"Don't get caught in the cold, warm-up your {JVM}: understand and eliminate {JVM} warm-up overhead in data-parallel systems","author":"Lion","year":"2016"},{"issue":"2","key":"10.1016\/j.jpdc.2022.09.003_br0310","article-title":"Docker: lightweight Linux containers for consistent development and deployment","volume":"2014","author":"Merkel","year":"2014","journal-title":"Linux J."},{"key":"10.1016\/j.jpdc.2022.09.003_br0320","series-title":"12th {USENIX} Symposium on Networked Systems Design and Implementation ({NSDI} 15)","first-page":"293","article-title":"Making sense of performance in data analytics frameworks","author":"Ousterhout","year":"2015"},{"key":"10.1016\/j.jpdc.2022.09.003_br0330","series-title":"Proceedings of the Twenty-Fourth ACM Symposium on Operating Systems Principles","first-page":"69","article-title":"Sparrow: distributed, low latency scheduling","author":"Ousterhout","year":"2013"},{"key":"10.1016\/j.jpdc.2022.09.003_br0340","series-title":"11th International Conference on Autonomic Computing ({ICAC} 14)","first-page":"137","article-title":"User-centric heterogeneity-aware mapreduce job provisioning in the public cloud","author":"Pettijohn","year":"2014"},{"key":"10.1016\/j.jpdc.2022.09.003_br0350","series-title":"Proceedings of the 15th International Conference on Extending Database Technology","first-page":"420","article-title":"Adaptive mapreduce using situation-aware mappers","author":"Vernica","year":"2012"},{"key":"10.1016\/j.jpdc.2022.09.003_br0360","series-title":"2015 IEEE Conference on Computer Communications (INFOCOM)","first-page":"1328","article-title":"Actcap: accelerating mapreduce on heterogeneous clusters with capability-aware data placement","author":"Wang","year":"2015"},{"key":"10.1016\/j.jpdc.2022.09.003_br0370","first-page":"1","article-title":"Efficient straggler replication in large-scale parallel computing","volume":"4","author":"Wang","year":"2019","journal-title":"ACM Trans. Model. Perform. Eval. Comput. Syst. (TOMPECS)"},{"key":"10.1016\/j.jpdc.2022.09.003_br0380","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.peva.2015.12.002","article-title":"Data locality in mapreduce: a network perspective","volume":"96","author":"Wang","year":"2016","journal-title":"Perform. Eval."},{"key":"10.1016\/j.jpdc.2022.09.003_br0390","series-title":"2010 IEEE International Symposium on Parallel & Distributed Processing, Workshops and Phd Forum (IPDPSW)","first-page":"1","article-title":"Improving mapreduce performance through data placement in heterogeneous hadoop clusters","author":"Xie","year":"2010"},{"key":"10.1016\/j.jpdc.2022.09.003_br0400","doi-asserted-by":"crossref","first-page":"2470","DOI":"10.1109\/TC.2015.2481403","article-title":"Heterogeneity and interference-aware virtual machine provisioning for predictable performance in the cloud","volume":"65","author":"Xu","year":"2015","journal-title":"IEEE Trans. Comput."},{"key":"10.1016\/j.jpdc.2022.09.003_br0410","first-page":"530","article-title":"Optimization for speculative execution in big data processing clusters","volume":"28","author":"Xu","year":"2017","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"10.1016\/j.jpdc.2022.09.003_br0420","series-title":"Computer Communication and Networks (ICCCN), 2017 26th International Conference on","first-page":"1","article-title":"Laser: a deep learning approach for speculative execution and replication of deadline-critical jobs in cloud","author":"Xu","year":"2017"},{"key":"10.1016\/j.jpdc.2022.09.003_br0430","series-title":"2018 IEEE 38th International Conference on Distributed Computing Systems (ICDCS)","article-title":"Chronos: a unifying optimization framework for speculative execution of deadline-critical mapreduce jobs","author":"Xu","year":"2018"},{"key":"10.1016\/j.jpdc.2022.09.003_br0440","series-title":"Proceedings of the 21st International Middleware Conference","first-page":"386","article-title":"Mitigating stragglers in the decentralized training on heterogeneous clusters","author":"Yang","year":"2020"},{"key":"10.1016\/j.jpdc.2022.09.003_br0450","series-title":"OSDI'08","article-title":"Improving mapreduce performance in heterogeneous environments","author":"Zaharia","year":"2008"},{"key":"10.1016\/j.jpdc.2022.09.003_br0460","doi-asserted-by":"crossref","first-page":"38","DOI":"10.1145\/2788402.2788409","article-title":"Exploiting cloud heterogeneity to optimize performance and cost of mapreduce processing","volume":"42","author":"Zhang","year":"2015","journal-title":"ACM SIGMETRICS Perform. Eval. Rev."}],"container-title":["Journal of Parallel and Distributed Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0743731522001915?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0743731522001915?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2023,3,4]],"date-time":"2023-03-04T17:13:49Z","timestamp":1677950029000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0743731522001915"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,1]]},"references-count":46,"alternative-id":["S0743731522001915"],"URL":"https:\/\/doi.org\/10.1016\/j.jpdc.2022.09.003","relation":{},"ISSN":["0743-7315"],"issn-type":[{"value":"0743-7315","type":"print"}],"subject":[],"published":{"date-parts":[[2023,1]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Forseti: Dynamic chunk-level reshaping for data processing on heterogeneous clusters","name":"articletitle","label":"Article Title"},{"value":"Journal of Parallel and Distributed Computing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.jpdc.2022.09.003","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2022 Elsevier Inc. All rights reserved.","name":"copyright","label":"Copyright"}]}}