{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T11:29:02Z","timestamp":1730287742836,"version":"3.28.0"},"reference-count":27,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1109\/padsw.2018.8644528","type":"proceedings-article","created":{"date-parts":[[2019,2,21]],"date-time":"2019-02-21T23:23:38Z","timestamp":1550791418000},"page":"569-576","source":"Crossref","is-referenced-by-count":1,"title":["CoLoR: Co-Located Rescuers for Fault Tolerance in HPC Systems"],"prefix":"10.1109","author":[{"given":"Zaeem","family":"Hussain","sequence":"first","affiliation":[]},{"given":"Xiaolong","family":"Cui","sequence":"additional","affiliation":[]},{"given":"Taieb","family":"Znati","sequence":"additional","affiliation":[]},{"given":"Rami","family":"Melhem","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/2503210.2503271"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/2063384.2063443"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.2172\/984082"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/UIC-ATC-ScalCom-CBDCom-IoP-SmartWorld.2016.0111"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.3100"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2004.11.016"},{"key":"ref16","article-title":"Combining process replication and checkpointing for resilience on exascale systems","author":"casanova","year":"2012","journal-title":"INRIA"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/HPCC-SmartCity-DSS.2017.71"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/966049.781513"},{"journal-title":"Mantevo home page","year":"2008","author":"heroux","key":"ref19"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2014.122"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCNC.2014.6785308"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"93","DOI":"10.1109\/CLUSTR.2004.1392606","article-title":"Ftc-charm++: an in-memory checkpoint-based fault tolerant runtime for charm++ and mpi","author":"zheng","year":"2004","journal-title":"Cluster Computing 2004 IEEE International Conference"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3959.3962"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CCGRID.2008.109"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-23397-5_6"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2012.18"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/568522.568525"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CCGRID.2010.110"},{"key":"ref1","article-title":"Exascale computing study: Technology challenges in achieving exascale systems","volume":"15","author":"bergman","year":"2008","journal-title":"Defense Advanced Research Projects Agency Information Processing Techniques Office (DARPA IPTO) Tech Rep"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.2172\/1090032"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2012.82"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/DSNW.2012.6264677"},{"key":"ref24","article-title":"Sender-based message logging","author":"zwaenepoel","year":"1987","journal-title":"Proceedings of the Seventeenth International Symposium on Fault-Tolerant Computing"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/32.666828"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2012.56"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/12.142678"}],"event":{"name":"2018 IEEE 24th International Conference on Parallel and Distributed Systems (ICPADS)","start":{"date-parts":[[2018,12,11]]},"location":"Singapore, Singapore","end":{"date-parts":[[2018,12,13]]}},"container-title":["2018 IEEE 24th International Conference on Parallel and Distributed Systems (ICPADS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8635632\/8644527\/08644528.pdf?arnumber=8644528","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,27]],"date-time":"2022-01-27T05:20:14Z","timestamp":1643260814000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8644528\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,12]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1109\/padsw.2018.8644528","relation":{},"subject":[],"published":{"date-parts":[[2018,12]]}}}