{"id":"https://openalex.org/W2963106581","doi":"https://doi.org/10.1177/1094342018767736","title":"A scalable and extensible checkpointing scheme for massively parallel simulations","display_name":"A scalable and extensible checkpointing scheme for massively parallel simulations","publication_year":2018,"publication_date":"2018-05-02","ids":{"openalex":"https://openalex.org/W2963106581","doi":"https://doi.org/10.1177/1094342018767736","mag":"2963106581"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1177/1094342018767736","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://open.fau.de/bitstreams/1ae98097-4799-4d22-8a8c-3fc9210ce810/download","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032938194","display_name":"Nils Kohl","orcid":"https://orcid.org/0000-0003-4797-0664"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Nils Kohl","raw_affiliation_strings":["Chair for System Simulation, Friedrich-Alexander University Erlangen-N\u00fcrnberg, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Chair for System Simulation, Friedrich-Alexander University Erlangen-N\u00fcrnberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025935876","display_name":"Johannes H\u00f6tzer","orcid":"https://orcid.org/0000-0001-9932-0562"},"institutions":[{"id":"https://openalex.org/I70886390","display_name":"Karlsruhe University of Applied Sciences","ror":"https://ror.org/01c0m1t63","country_code":"DE","type":"education","lineage":["https://openalex.org/I70886390"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Johannes H\u00f6tzer","raw_affiliation_strings":["Institute of Materials and Processes, Karlsruhe University of Applied Sciences, Karlsruhe, Germany"],"affiliations":[{"raw_affiliation_string":"Institute of Materials and Processes, Karlsruhe University of Applied Sciences, Karlsruhe, Germany","institution_ids":["https://openalex.org/I70886390"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064942320","display_name":"Florian Schornbaum","orcid":null},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Florian Schornbaum","raw_affiliation_strings":["Chair for System Simulation, Friedrich-Alexander University Erlangen-N\u00fcrnberg, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Chair for System Simulation, Friedrich-Alexander University Erlangen-N\u00fcrnberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103067022","display_name":"Martin Bauer","orcid":"https://orcid.org/0000-0003-0272-3205"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Martin Bauer","raw_affiliation_strings":["Chair for System Simulation, Friedrich-Alexander University Erlangen-N\u00fcrnberg, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Chair for System Simulation, Friedrich-Alexander University Erlangen-N\u00fcrnberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001490449","display_name":"Christian Godenschwager","orcid":"https://orcid.org/0000-0002-2794-9510"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Christian Godenschwager","raw_affiliation_strings":["Chair for System Simulation, Friedrich-Alexander University Erlangen-N\u00fcrnberg, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Chair for System Simulation, Friedrich-Alexander University Erlangen-N\u00fcrnberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110039750","display_name":"Harald K\u00f6stler","orcid":null},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Harald K\u00f6stler","raw_affiliation_strings":["Chair for System Simulation, Friedrich-Alexander University Erlangen-N\u00fcrnberg, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Chair for System Simulation, Friedrich-Alexander University Erlangen-N\u00fcrnberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035552974","display_name":"Britta Nestler","orcid":"https://orcid.org/0000-0002-3768-3277"},"institutions":[{"id":"https://openalex.org/I70886390","display_name":"Karlsruhe University of Applied Sciences","ror":"https://ror.org/01c0m1t63","country_code":"DE","type":"education","lineage":["https://openalex.org/I70886390"]},{"id":"https://openalex.org/I102335020","display_name":"Karlsruhe Institute of Technology","ror":"https://ror.org/04t3en479","country_code":"DE","type":"education","lineage":["https://openalex.org/I102335020","https://openalex.org/I1305996414"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Britta Nestler","raw_affiliation_strings":["Institute for Applied Materials, Karlsruhe Institute of Technology, Karlsruhe, Germany","Institute of Materials and Processes, Karlsruhe University of Applied Sciences, Karlsruhe, Germany"],"affiliations":[{"raw_affiliation_string":"Institute of Materials and Processes, Karlsruhe University of Applied Sciences, Karlsruhe, Germany","institution_ids":["https://openalex.org/I70886390"]},{"raw_affiliation_string":"Institute for Applied Materials, Karlsruhe Institute of Technology, Karlsruhe, Germany","institution_ids":["https://openalex.org/I102335020"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109197540","display_name":"Ulrich R\u00fcde","orcid":null},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]},{"id":"https://openalex.org/I4210106946","display_name":"Centre Europ\u00e9en de Recherche et de Formation Avanc\u00e9e en Calcul Scientifique","ror":"https://ror.org/02dzbc556","country_code":"FR","type":"facility","lineage":["https://openalex.org/I4210106946"]}],"countries":["DE","FR"],"is_corresponding":false,"raw_author_name":"Ulrich R\u00fcde","raw_affiliation_strings":["Chair for System Simulation, Friedrich-Alexander University Erlangen-N\u00fcrnberg, Erlangen, Germany","Parallel Algorithms Project, CERFACS, Toulouse, France"],"affiliations":[{"raw_affiliation_string":"Chair for System Simulation, Friedrich-Alexander University Erlangen-N\u00fcrnberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]},{"raw_affiliation_string":"Parallel Algorithms Project, CERFACS, Toulouse, France","institution_ids":["https://openalex.org/I4210106946"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5032938194"],"corresponding_institution_ids":["https://openalex.org/I181369854"],"apc_list":null,"apc_paid":null,"fwci":2.567,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":20,"citation_normalized_percentile":{"value":0.839025,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":91},"biblio":{"volume":"33","issue":"4","first_page":"571","last_page":"589"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9984,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9983,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/exascale-computing","display_name":"Exascale computing","score":0.66991967},{"id":"https://openalex.org/keywords/petascale-computing","display_name":"Petascale computing","score":0.60266805},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness","score":0.57185507},{"id":"https://openalex.org/keywords/runtime-system","display_name":"Runtime system","score":0.43547174}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8505017},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.84336543},{"id":"https://openalex.org/C190475519","wikidata":"https://www.wikidata.org/wiki/Q544384","display_name":"Massively parallel","level":2,"score":0.74381894},{"id":"https://openalex.org/C2778837361","wikidata":"https://www.wikidata.org/wiki/Q2450880","display_name":"Exascale computing","level":3,"score":0.66991967},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.65301454},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.62543094},{"id":"https://openalex.org/C185410017","wikidata":"https://www.wikidata.org/wiki/Q7171778","display_name":"Petascale computing","level":3,"score":0.60266805},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.57185507},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.5616194},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.50733346},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.46069926},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4477989},{"id":"https://openalex.org/C2780870223","wikidata":"https://www.wikidata.org/wiki/Q1004415","display_name":"Runtime system","level":2,"score":0.43547174},{"id":"https://openalex.org/C63540848","wikidata":"https://www.wikidata.org/wiki/Q3140932","display_name":"Fault tolerance","level":2,"score":0.41036186},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2415972},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1177/1094342018767736","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://opus4.kobv.de/opus4-fau/files/13665/10.1177_1094342018767736.pdf","pdf_url":"https://open.fau.de/bitstreams/1ae98097-4799-4d22-8a8c-3fc9210ce810/download","source":{"id":"https://openalex.org/S4306402340","display_name":"OPUS FAU (Kooperativer Bibliotheksverbund Berlin-Brandenburg (KOBV), on behalf of the Universit\u00e4tsbibliothek Erlangen-N\u00fcrnberg)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/1708.08286","pdf_url":"https://arxiv.org/pdf/1708.08286","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://opus4.kobv.de/opus4-fau/files/13665/10.1177_1094342018767736.pdf","pdf_url":"https://open.fau.de/bitstreams/1ae98097-4799-4d22-8a8c-3fc9210ce810/download","source":{"id":"https://openalex.org/S4306402340","display_name":"OPUS FAU (Kooperativer Bibliotheksverbund Berlin-Brandenburg (KOBV), on behalf of the Universit\u00e4tsbibliothek Erlangen-N\u00fcrnberg)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.48,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":59,"referenced_works":["https://openalex.org/W1190410299","https://openalex.org/W133298808","https://openalex.org/W1525214480","https://openalex.org/W1552101503","https://openalex.org/W1558516248","https://openalex.org/W1593120037","https://openalex.org/W170768260","https://openalex.org/W1762709353","https://openalex.org/W1948950449","https://openalex.org/W196759981","https://openalex.org/W1970172627","https://openalex.org/W1981432246","https://openalex.org/W1984564341","https://openalex.org/W2001689002","https://openalex.org/W2025024269","https://openalex.org/W2025166208","https://openalex.org/W2031260715","https://openalex.org/W2033656974","https://openalex.org/W2035492130","https://openalex.org/W2049132247","https://openalex.org/W2059333316","https://openalex.org/W2072072075","https://openalex.org/W2083613288","https://openalex.org/W2084126569","https://openalex.org/W2100970777","https://openalex.org/W2104097935","https://openalex.org/W2105524676","https://openalex.org/W2106821446","https://openalex.org/W2111715635","https://openalex.org/W2115711419","https://openalex.org/W2116115793","https://openalex.org/W2125488852","https://openalex.org/W2128577831","https://openalex.org/W2128854702","https://openalex.org/W2133046454","https://openalex.org/W2150345788","https://openalex.org/W2167083307","https://openalex.org/W2172500565","https://openalex.org/W2245853462","https://openalex.org/W2272228702","https://openalex.org/W2275320771","https://openalex.org/W2276814379","https://openalex.org/W2332254154","https://openalex.org/W2540977105","https://openalex.org/W2553656914","https://openalex.org/W2605487123","https://openalex.org/W2613785092","https://openalex.org/W2621825541","https://openalex.org/W2724591155","https://openalex.org/W288065879","https://openalex.org/W2962708178","https://openalex.org/W2962749287","https://openalex.org/W2964221710","https://openalex.org/W3104389750","https://openalex.org/W3121548481","https://openalex.org/W31923072","https://openalex.org/W4233783938","https://openalex.org/W4239647010","https://openalex.org/W4244600229"],"related_works":["https://openalex.org/W4289494037","https://openalex.org/W3038449658","https://openalex.org/W2889207371","https://openalex.org/W2631751282","https://openalex.org/W2278366184","https://openalex.org/W2266027327","https://openalex.org/W2249929881","https://openalex.org/W2021702679","https://openalex.org/W1582746211","https://openalex.org/W1569809235"],"abstract_inverted_index":{"Realistic":[0],"simulations":[1,94],"in":[2,5,160,182],"engineering":[3],"or":[4],"the":[6,17,31,35,85,90,123,148,169,174,183],"materials":[7],"sciences":[8,185],"can":[9,73],"consume":[10],"enormous":[11],"computing":[12],"resources":[13],"and":[14,33,68,75,87,122,171,186],"thus":[15],"require":[16,117],"use":[18],"of":[19,25,37,78,89,173],"massively":[20],"parallel":[21],"supercomputers.":[22],"The":[23,154],"probability":[24],"a":[26,64,79,119,141,161,177,188],"failure":[27],"increases":[28],"both":[29],"with":[30,34,95,176,187],"runtime":[32],"number":[36],"system":[38],"components.":[39],"For":[40],"future":[41],"exascale":[42],"systems,":[43],"it":[44],"is":[45,114,157],"therefore":[46],"considered":[47],"critical":[48],"that":[49,72],"strategies":[50],"are":[51],"developed":[52],"to":[53,97,116,131],"make":[54],"software":[55],"resilient":[56,69],"against":[57],"failures.":[58],"In":[59],"this":[60],"article,":[61],"we":[62,146],"present":[63],"scalable,":[65],"distributed,":[66],"diskless,":[67],"checkpointing":[70,125,155],"scheme":[71,126],"create":[74],"recover":[76,139],"snapshots":[77],"partitioned":[80],"simulation":[81,165,180],"domain.":[82],"We":[83,167],"demonstrate":[84,168],"efficiency":[86,170],"scalability":[88],"checkpoint":[91,112,143],"strategy":[92],"for":[93],"up":[96,130],"40":[98],"billion":[99,107],"computational":[100],"cells":[101],"executing":[102],"on":[103],"more":[104,132],"than":[105,133],"400":[106],"floating":[108],"point":[109],"values.":[110],"A":[111],"creation":[113],"shown":[115],"only":[118],"few":[120],"seconds":[121],"new":[124],"scales":[127],"almost":[128],"perfectly":[129],"260,":[134],"000":[135],"(218)":[136],"processes.":[137],"To":[138],"from":[140],"diskless":[142],"during":[144],"runtime,":[145],"realize":[147],"recovery":[149],"algorithms":[150],"using":[151],"ULFM":[152],"MPI.":[153],"mechanism":[156],"fully":[158],"integrated":[159],"state-of-the-art":[162],"high-performance":[163],"multi-physics":[164],"framework.":[166],"robustness":[172],"method":[175,191],"realistic":[178],"phase-field":[179],"originating":[181],"material":[184],"lattice":[189],"Boltzmann":[190],"implementation.":[192]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2963106581","counts_by_year":[{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":6},{"year":2018,"cited_by_count":2}],"updated_date":"2025-01-05T22:49:54.789684","created_date":"2019-07-30"}