{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T00:41:06Z","timestamp":1743727266459},"reference-count":13,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013,6]]},"DOI":"10.1109\/dsn.2013.6575356","type":"proceedings-article","created":{"date-parts":[[2013,8,14]],"date-time":"2013-08-14T16:18:36Z","timestamp":1376497116000},"source":"Crossref","is-referenced-by-count":69,"title":["Reading between the lines of failure logs: Understanding how HPC systems fail"],"prefix":"10.1109","author":[{"given":"Nosayba","family":"El-Sayed","sequence":"first","affiliation":[]},{"given":"Bianca","family":"Schroeder","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"13","doi-asserted-by":"publisher","DOI":"10.1109\/ISPAW.2011.55"},{"key":"11","year":"0"},{"key":"12","article-title":"A large-scale study of failures in highperformance computing systems","author":"schroeder","year":"0","journal-title":"Proc of DSN'06"},{"key":"3","doi-asserted-by":"crossref","DOI":"10.1145\/2318857.2254778","article-title":"Temperature management in data centers: Why some (might) like it hot","author":"el-sayed","year":"2012","journal-title":"Proc of SIGMETRICS"},{"key":"2","article-title":"Workload, performance, and reliability of digital computing systems","author":"castillo","year":"1981","journal-title":"Proc Int Symp Fault-Tolerant Comput"},{"key":"1","year":"0"},{"key":"10","article-title":"BlueGene\/L failure analysis and prediction models","author":"liang","year":"2006","journal-title":"Proc of DSN'06"},{"key":"7","doi-asserted-by":"publisher","DOI":"10.1145\/2150976.2150989"},{"key":"6","article-title":"Modeling and tolerating heterogeneous failures in large parallel systems","author":"heien","year":"2011","journal-title":"Proc of SC'11"},{"key":"5","doi-asserted-by":"publisher","DOI":"10.1145\/1362622.1362678"},{"key":"4","article-title":"Availability in globally distributed storage systems","author":"ford","year":"2010","journal-title":"Proc of OSDI'10"},{"key":"9","doi-asserted-by":"publisher","DOI":"10.1145\/6420.6422"},{"key":"8","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.1985.232180"}],"event":{"name":"2013 43rd Annual IEEE\/IFIP International Conference on Dependable Systems and Networks (DSN)","location":"Budapest, Hungary","start":{"date-parts":[[2013,6,24]]},"end":{"date-parts":[[2013,6,27]]}},"container-title":["2013 43rd Annual IEEE\/IFIP International Conference on Dependable Systems and Networks (DSN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6569391\/6575287\/06575356.pdf?arnumber=6575356","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,21]],"date-time":"2017-06-21T17:50:13Z","timestamp":1498067413000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6575356\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,6]]},"references-count":13,"URL":"https:\/\/doi.org\/10.1109\/dsn.2013.6575356","relation":{},"subject":[],"published":{"date-parts":[[2013,6]]}}}