{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,22]],"date-time":"2024-10-22T22:11:04Z","timestamp":1729635064407,"version":"3.28.0"},"reference-count":29,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012,6]]},"DOI":"10.1109\/dsnw.2012.6264675","type":"proceedings-article","created":{"date-parts":[[2012,8,17]],"date-time":"2012-08-17T15:49:12Z","timestamp":1345218552000},"page":"1-6","source":"Crossref","is-referenced-by-count":9,"title":["On the complexity of scheduling checkpoints for computational workflows"],"prefix":"10.1109","author":[{"given":"Yves","family":"Robert","sequence":"first","affiliation":[]},{"given":"Frederic","family":"Vivien","sequence":"additional","affiliation":[]},{"given":"Dounia","family":"Zaidouni","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"19","article-title":"Complexity analysis of checkpoint scheduling with variable costs","author":"bouguerra","year":"2012","journal-title":"Computers"},{"journal-title":"Introduction to Algorithms","year":"2001","author":"cormen","key":"17"},{"key":"18","article-title":"Scheduling parallel tasks approximation algorithms","author":"franc?ois dutot","year":"2004","journal-title":"Handbook of Scheduling"},{"key":"15","article-title":"Using group replication for resilience on exascale systems","author":"bougeret","year":"2012","journal-title":"INRIA"},{"journal-title":"Computers and Intractability A Guide to the Theory of NP-Completeness","year":"1979","author":"garey","key":"16"},{"key":"13","doi-asserted-by":"publisher","DOI":"10.1145\/1465482.1465560"},{"key":"14","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898719642"},{"key":"11","first-page":"206","article-title":"A flexible checkpoint\/restart model in distributed systems","volume":"6067","author":"bouguerra","year":"2010","journal-title":"PPAM ser LNCS"},{"key":"12","doi-asserted-by":"crossref","DOI":"10.1145\/2063384.2063428","article-title":"Checkpointing strategies for parallel jobs","author":"bougeret","year":"2011","journal-title":"Proc SC'2011 Int Conf for High Performance Computing Networking Storage and Analysis"},{"key":"21","doi-asserted-by":"publisher","DOI":"10.1145\/361147.361115"},{"key":"20","doi-asserted-by":"publisher","DOI":"10.1109\/CCGRID.2010.71"},{"key":"22","doi-asserted-by":"publisher","DOI":"10.1145\/1851476.1851509"},{"key":"23","first-page":"2690","article-title":"Analysis of dependencies of checkpoint cost and checkpoint interval of fault tolerant MPI applications","volume":"2","author":"venkatesh","year":"2010","journal-title":"Analysis"},{"key":"24","doi-asserted-by":"publisher","DOI":"10.1145\/190.357398"},{"key":"25","first-page":"280","article-title":"Bi-objective scheduling algorithms for optimizing makespan and reliability on heterogeneous systems","author":"dongarra","year":"2007","journal-title":"Annual ACM Symposium on Parallelism in Algorithms and Architectures"},{"key":"26","doi-asserted-by":"publisher","DOI":"10.1109\/71.993209"},{"key":"27","doi-asserted-by":"crossref","first-page":"326","DOI":"10.1016\/j.jpdc.2008.11.002","article-title":"Reliability versus performance for critical applications","volume":"69","author":"girault","year":"2009","journal-title":"J Parallel Distributed Computing"},{"key":"28","article-title":"Using replication and checkpointing for reliable task management in computational grids","author":"yi","year":"2010","journal-title":"Proc of the International Conference on High Performance Computing & Simulation"},{"key":"29","article-title":"Evaluating the viability of process replication reliability for exascale systems","author":"ferreira","year":"2011","journal-title":"Proceedings of the 2011 ACM\/IEEE Conference on Supercomputing"},{"key":"3","doi-asserted-by":"publisher","DOI":"10.1109\/HCW.2000.843736"},{"journal-title":"DataCutter Project Middleware for Filtering Large Archival Scientific Datasets in a Grid Environment","year":"0","key":"2"},{"key":"10","doi-asserted-by":"crossref","DOI":"10.1145\/2063384.2063444","article-title":"Modeling and tolerating heterogeneous failures in large parallel systems","author":"heien","year":"2011","journal-title":"Proc SC'2011 Int Conf for High Performance Computing Networking Storage and Analysis"},{"key":"1","first-page":"381","article-title":"Software rejuvenation: Analysis, module and applications","author":"kolettis","year":"1995","journal-title":"FTCS'95"},{"key":"7","doi-asserted-by":"publisher","DOI":"10.1145\/511399.511362"},{"key":"6","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2004.11.016"},{"key":"5","doi-asserted-by":"publisher","DOI":"10.1016\/0020-0190(83)90093-5"},{"key":"4","doi-asserted-by":"publisher","DOI":"10.1109\/ICPADS.2008.40"},{"key":"9","first-page":"1","article-title":"An optimal checkpoint\/restart model for a large scale high performance computing system","author":"liu","year":"2008","journal-title":"IPDPS 2008"},{"key":"8","doi-asserted-by":"publisher","DOI":"10.1109\/DSN.2006.5"}],"event":{"name":"2012 IEEE\/IFIP 42nd International Conference on Dependable Systems and Networks Workshops (DSN-W)","start":{"date-parts":[[2012,6,25]]},"location":"Boston, MA, USA","end":{"date-parts":[[2012,6,28]]}},"container-title":["IEEE\/IFIP International Conference on Dependable Systems and Networks Workshops (DSN 2012)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/6255871\/6264647\/06264675.pdf?arnumber=6264675","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,20]],"date-time":"2017-06-20T18:59:20Z","timestamp":1497985160000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6264675\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,6]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/dsnw.2012.6264675","relation":{},"subject":[],"published":{"date-parts":[[2012,6]]}}}