{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,10,15]],"date-time":"2023-10-15T07:17:29Z","timestamp":1697354249814},"reference-count":22,"publisher":"Elsevier BV","issue":"8","license":[{"start":{"date-parts":[[2007,12,1]],"date-time":"2007-12-01T00:00:00Z","timestamp":1196467200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2013,7,17]],"date-time":"2013-07-17T00:00:00Z","timestamp":1374019200000},"content-version":"vor","delay-in-days":2055,"URL":"https:\/\/www.elsevier.com\/open-access\/userlicense\/1.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Journal of Computer and System Sciences"],"published-print":{"date-parts":[[2007,12]]},"DOI":"10.1016\/j.jcss.2007.02.011","type":"journal-article","created":{"date-parts":[[2007,2,25]],"date-time":"2007-02-25T11:57:35Z","timestamp":1172404655000},"page":"1240-1251","source":"Crossref","is-referenced-by-count":3,"title":["Towards highly available and scalable high performance clusters"],"prefix":"10.1016","volume":"73","author":[{"given":"Azzedine","family":"Boukerche","sequence":"first","affiliation":[]},{"given":"Raed A.","family":"Al-Shaikh","sequence":"additional","affiliation":[]},{"given":"Mirela Sechi Moretti Annoni","family":"Notare","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.jcss.2007.02.011_bib001","unstructured":"F. Pister, L. Hess, V. Lindenstruth, Fault tolerant grid and cluster systems, Kirchhoff Institute of Physics (KIP), University Heidelberg, Germany, 2004, pp. 360\u2013363"},{"issue":"3","key":"10.1016\/j.jcss.2007.02.011_bib002","doi-asserted-by":"crossref","first-page":"353","DOI":"10.1177\/1094342004046052","article-title":"Building and using a fault tolerant MPI implementation","volume":"18","author":"Fagg","year":"2004","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"10.1016\/j.jcss.2007.02.011_bib003","doi-asserted-by":"crossref","unstructured":"I. Haddad, C. Leangsuksun, R. Libby, T. Liu, Y. Liu, S. Scott, Highly reliable Linux HPC clusters: Self-awareness approach, in: Proceedings of the 2nd International Symposium on Parallel and Distributed Processing and Applications, Hong Kong, China, December, 2004","DOI":"10.1007\/978-3-540-30566-8_27"},{"key":"10.1016\/j.jcss.2007.02.011_bib004","unstructured":"J. Mugler, T. Naugthon, S. Scott, C. Leangsuksun, OSCAR clusters, in: Proceedings of the Linux Symposium, Ottawa, Canada, 2003"},{"key":"10.1016\/j.jcss.2007.02.011_bib005","doi-asserted-by":"crossref","unstructured":"P. Lemarinier, A. Bouteiller, T. Herault, G. Krawezik, F. Cappello, Improved message logging versus improved coordinated checkpointing for fault tolerant MPI, in: Proceedings of the 6th International Conference on Cluster Computing, CLUSTER'04, San Diego, CA, USA, 2004","DOI":"10.1109\/CLUSTR.2003.1253321"},{"key":"10.1016\/j.jcss.2007.02.011_bib006","doi-asserted-by":"crossref","unstructured":"G.E. Fagg, J.J. Dongarra, FT-MPI: Fault tolerant MPI, in: Supporting Dynamic Applications in a Dynamic World, PVM\/MPI, 2000, pp. 346\u2013353","DOI":"10.1007\/3-540-45255-9_47"},{"key":"10.1016\/j.jcss.2007.02.011_bib007","doi-asserted-by":"crossref","first-page":"353","DOI":"10.1177\/1094342004046052","article-title":"Building and using fault-tolerant MPI implementation","volume":"18","author":"Fagg","year":"2004","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"10.1016\/j.jcss.2007.02.011_bib008","unstructured":"G. Stellner, 1996. CoCheck: Checkpointing and process migration for MPI, in: Proceedings of IPPS'96, The 10th International Parallel Processing Symposium, Honolulu, HI, USA, 1996, pp. 526\u2013531"},{"key":"10.1016\/j.jcss.2007.02.011_bib009","series-title":"MPI: The Complete Reference, vol. 1","author":"Snir","year":"1998"},{"key":"10.1016\/j.jcss.2007.02.011_bib010","first-page":"874","article-title":"Low-latency, Concurrent checkpointing for parallel programs","author":"Li","year":"1998","journal-title":"IEEE Trans. Parallel Distrib. Systems"},{"key":"10.1016\/j.jcss.2007.02.011_bib011","unstructured":"William Gropp, Ewing Lusk, Fault tolerance in MPI programs, in: Proceedings of the Cluster Computing and Grid Systems Conference, December 2002"},{"key":"10.1016\/j.jcss.2007.02.011_bib012","unstructured":"A. Gidenstam, B. Koldehofe, M. Papatriantafilou, P. Tsigas, Dynamic and fault-tolerant cluster management, Technical report 2005-10, Computer Science and Engineering, Chalmers University of Technology, April 2005"},{"key":"10.1016\/j.jcss.2007.02.011_bib013","first-page":"60","article-title":"Clustering software for Linux-based HPC","volume":"44","author":"Hasegawa","year":"2003","journal-title":"NEC Res. Dev. High Perform. Comput."},{"key":"10.1016\/j.jcss.2007.02.011_bib014","unstructured":"B. Polgar, Designing the reconfiguration strategies of fault tolerant servers, in: The Third European Dependable Computing Conference, Czech Republic, September, Prague, 1999"},{"key":"10.1016\/j.jcss.2007.02.011_bib015","unstructured":"P. Sobe, Fault-tolerant Web services on a computer cluster, in: Dependable Computing\u2014EDCC-3, The Third European Dependable Computing Conference, Czech Technical University in Prague, Prague, 1999"},{"key":"10.1016\/j.jcss.2007.02.011_bib016","doi-asserted-by":"crossref","first-page":"17","DOI":"10.1109\/MIC.2005.45","article-title":"Recovering Internet service sessions from operating system failures","volume":"9","author":"Sultan","year":"2005","journal-title":"IEEE Internet Comput."},{"key":"10.1016\/j.jcss.2007.02.011_bib017","unstructured":"R. Aulwes, D. Daniel, et al., Architecture of LA-MPI, a network-fault-tolerant MPI, in: Proceedings of the 18th International Parallel and Distributed Processing Symposium, 2004, pp. 26\u201330"},{"key":"10.1016\/j.jcss.2007.02.011_bib018","unstructured":"A. Azagury, D. Dolev, et al., Highly available cluster: A case study. Fault-tolerant computing, in: 24th International Symposium on Fault Tolerant Computing System, TX, USA, 1994, pp. 404\u2013413"},{"key":"10.1016\/j.jcss.2007.02.011_bib019","series-title":"8th International Symposium on High Performance Distributed Computing, HPDC-8'99","article-title":"Starfish: Fault-tolerant dynamic MPI programs on clusters of workstations","author":"Agbaria","year":"1999"},{"key":"10.1016\/j.jcss.2007.02.011_bib020","unstructured":"S. Rao, L. Alvisi, H. Vin, Egida: An extensible toolkit for low-overhead fault-tolerance, 29th International Fault-Tolerant Computing Symposium, Los Alamitos, CA, 1999, pp. 48\u201355"},{"key":"10.1016\/j.jcss.2007.02.011_bib021","series-title":"10th International Parallel Processing Symposium","first-page":"526","article-title":"CoCheck: Checkpointing and process migration for MPI","author":"Stellner","year":"2004"},{"key":"10.1016\/j.jcss.2007.02.011_bib022","doi-asserted-by":"crossref","unstructured":"Z. Chen, G. Fagg, E. Gabriel, J. Langou, T. Angskun, G. Bosilca, J. Dongarra, Fault tolerant high performance computing by a coding approach, in: Proc. of the ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, IL, USA, 2005, pp. 15\u201317","DOI":"10.1145\/1065944.1065973"}],"container-title":["Journal of Computer and System Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0022000007000165?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0022000007000165?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2019,4,24]],"date-time":"2019-04-24T22:48:06Z","timestamp":1556146086000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0022000007000165"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2007,12]]},"references-count":22,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2007,12]]}},"alternative-id":["S0022000007000165"],"URL":"https:\/\/doi.org\/10.1016\/j.jcss.2007.02.011","relation":{},"ISSN":["0022-0000"],"issn-type":[{"value":"0022-0000","type":"print"}],"subject":[],"published":{"date-parts":[[2007,12]]}}}