{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,7,2]],"date-time":"2024-07-02T03:50:37Z","timestamp":1719892237833},"reference-count":28,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2017,8,1]],"date-time":"2017-08-01T00:00:00Z","timestamp":1501545600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2018,4,17]],"date-time":"2018-04-17T00:00:00Z","timestamp":1523923200000},"content-version":"am","delay-in-days":259,"URL":"http:\/\/www.elsevier.com\/open-access\/userlicense\/1.0\/"}],"funder":[{"DOI":"10.13039\/100000015","name":"US Department of Energy","doi-asserted-by":"publisher","award":["DE-AC02-05CH11231"],"id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Office of Science of the US Department of Energy","award":["DE-AC05-00OR22725"]},{"name":"DOE Office of Science User Facility","award":["DE-AC02-06CH11357"]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Journal of Parallel and Distributed Computing"],"published-print":{"date-parts":[[2017,8]]},"DOI":"10.1016\/j.jpdc.2017.02.010","type":"journal-article","created":{"date-parts":[[2017,3,8]],"date-time":"2017-03-08T19:47:23Z","timestamp":1489002443000},"page":"92-105","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":6,"special_numbering":"C","title":["Cross-scale efficient tensor contractions for coupled cluster computations through multiple programming model backends"],"prefix":"10.1016","volume":"106","author":[{"given":"Khaled Z.","family":"Ibrahim","sequence":"first","affiliation":[]},{"given":"Evgeny","family":"Epifanovsky","sequence":"additional","affiliation":[]},{"given":"Samuel","family":"Williams","sequence":"additional","affiliation":[]},{"given":"Anna I.","family":"Krylov","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.jpdc.2017.02.010_br000005","series-title":"C++ Template Metaprogramming: Concepts, Tools, and Techniques from Boost and Beyond","author":"Abrahams","year":"2004"},{"key":"10.1016\/j.jpdc.2017.02.010_br000010","series-title":"European Conference on Parallel Processing","first-page":"863","article-title":"StarPU: A unified platform for task scheduling on heterogeneous multicore architectures","author":"Augonnet","year":"2009"},{"key":"10.1016\/j.jpdc.2017.02.010_br000015","unstructured":"J. Calvin, TiledArray, a massively-parallel, block-sparse tensor library written in C++, https:\/\/github.com\/ValeevGroup\/tiledarray \u00a0(accessed on 30.05.14)."},{"key":"10.1016\/j.jpdc.2017.02.010_br000020","doi-asserted-by":"crossref","unstructured":"J. Demmel, D. Eliahu, A. Fox, S. Kamil, B. Lipshitz, O. Schwartz, O. Spillinger, Communication-optimal parallel recursive rectangular matrix multiplication, in: The IEEE International Symposium on Parallel Distributed Processing, IPDPS, 2013, pp. 261\u2013272.","DOI":"10.1109\/IPDPS.2013.80"},{"key":"10.1016\/j.jpdc.2017.02.010_br000025","author":"Epifanovsky"},{"key":"10.1016\/j.jpdc.2017.02.010_br000030","doi-asserted-by":"crossref","first-page":"2293","DOI":"10.1002\/jcc.23377","article-title":"New implementation of high-level correlated methods using a general block-tensor library for high-performance electronic structure calculations","volume":"34","author":"Epifanovsky","year":"2013","journal-title":"J. Comput. Chem."},{"key":"10.1016\/j.jpdc.2017.02.010_br000035","series-title":"High Performance Computing Systems: Performance Modeling, Benchmarking and Simulation","first-page":"281","article-title":"Performance analysis of the NWChem TCE for different communication patterns","volume":"vol. 8551","author":"Ghosh","year":"2014"},{"key":"10.1016\/j.jpdc.2017.02.010_br000040","doi-asserted-by":"crossref","unstructured":"D. Hackenberg, R. Schne, T. Ilsche, D. Molka, J. Schuchart, R. Geyer, An energy efficiency feature survey of the intel haswell processor, in: The IEEE International Parallel and Distributed Processing Symposium Workshop, IPDPSW, 2015, pp. 896\u2013904.","DOI":"10.1109\/IPDPSW.2015.70"},{"issue":"46","key":"10.1016\/j.jpdc.2017.02.010_br000045","doi-asserted-by":"crossref","first-page":"9887","DOI":"10.1021\/jp034596z","article-title":"Tensor contraction engine: abstraction and automated parallel implementation of configuration-interaction, coupled-cluster, and many-body perturbation theories","volume":"107","author":"Hirata","year":"2003","journal-title":"J. Phys. Chem. A"},{"key":"10.1016\/j.jpdc.2017.02.010_br000050","unstructured":"M. Horowitz, T. Indermaur, R. Gonzalez, Low-power digital design, in: IEEE Symposium Low Power Electronics, Digest of Technical Papers. 1994, pp. 8\u201311."},{"issue":"4es","key":"10.1016\/j.jpdc.2017.02.010_br000055","doi-asserted-by":"crossref","first-page":"196","DOI":"10.1145\/242224.242477","article-title":"Building domain-specific embedded languages","volume":"28","author":"Hudak","year":"1996","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"10.1016\/j.jpdc.2017.02.010_br000060","doi-asserted-by":"crossref","unstructured":"K. Ibrahim, S. Williams, E. Epifanovsky, A. Krylov, Analysis and tuning of libtensor framework on multicore architectures, in: The International Conference on High Performance Computing, HiPC, 2014, pp. 1\u201310.","DOI":"10.1109\/HiPC.2014.7116881"},{"issue":"9","key":"10.1016\/j.jpdc.2017.02.010_br000065","doi-asserted-by":"crossref","first-page":"1017","DOI":"10.1016\/j.jpdc.2004.03.021","article-title":"Communication lower bounds for distributed-memory matrix multiplication","volume":"64","author":"Irony","year":"2004","journal-title":"J. Parallel Distrib. Comput."},{"key":"10.1016\/j.jpdc.2017.02.010_br000070","series-title":"The 8th International Conference on Partitioned Global Address Space Programming Models","first-page":"6:1","article-title":"Hpx: A task based programming model in a global address space","author":"Kaiser","year":"2014"},{"key":"10.1016\/j.jpdc.2017.02.010_br000075","series-title":"CHARM++: A Portable Concurrent Object Oriented System Based on C++, Vol. 28","author":"Kale","year":"1993"},{"key":"10.1016\/j.jpdc.2017.02.010_br000080","unstructured":"M. Krishnan, J. Nieplocha, Srumma: a matrix multiplication algorithm suitable for clusters and scalable shared memory systems, in: The International Parallel and Distributed Processing Symposium, 2004, p. 70\u2013."},{"key":"10.1016\/j.jpdc.2017.02.010_br000085","unstructured":"X. Liu, A. Patel, E. Chow, A new scalable parallel algorithm for fock matrix construction, in: The IEEE Parallel and Distributed Processing Symposium."},{"key":"10.1016\/j.jpdc.2017.02.010_br000090","doi-asserted-by":"crossref","unstructured":"V. Lotrich, N. Flocke, M. Ponton, B.A. Sanders, E. Deumens, R.J. Bartlett, A. Perera, An infrastructure for scalable and portable parallel programs for computational chemistry, in: International Conference of Supercomputing, ICS, 2009, pp. 523\u2013524.","DOI":"10.1145\/1542275.1542361"},{"key":"10.1016\/j.jpdc.2017.02.010_br000095","unstructured":"D. Matthews, Aquarius (a parallel quantum chemistry package), https:\/\/github.com\/devinamatthews\/aquarius (accessed on 02.04.16)."},{"issue":"2","key":"10.1016\/j.jpdc.2017.02.010_br000100","doi-asserted-by":"crossref","first-page":"203","DOI":"10.1177\/1094342006064503","article-title":"Advances, applications and performance of the global arrays shared memory programming toolkit","volume":"20","author":"Nieplocha","year":"2006","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"10.1016\/j.jpdc.2017.02.010_br000105","doi-asserted-by":"crossref","first-page":"184","DOI":"10.1080\/00268976.2014.952696","article-title":"Advances in molecular quantum chemistry contained in the Q-Chem 4 program package","volume":"113","author":"Shao","year":"2015","journal-title":"Mol. Phys."},{"key":"10.1016\/j.jpdc.2017.02.010_br000110","doi-asserted-by":"crossref","unstructured":"E. Solomonik, J. Demmel, Communication-optimal parallel 2.5D matrix multiplication and LU factorization algorithms, in: Euro-Par 2011 Parallel Processing, vol. 6853, 2011, pp. 90\u2013109. http:\/\/dx.doi.org\/10.1007\/978-3-642-23397-5_10.","DOI":"10.1007\/978-3-642-23397-5_10"},{"key":"10.1016\/j.jpdc.2017.02.010_br000115","doi-asserted-by":"crossref","unstructured":"E. Solomonik, D. Matthews, J. Hammond, J. Demmel, Cyclops tensor framework: Reducing communication and eliminating load imbalance in massively parallel contractions, in: The IEEE 27th International Symposium on Parallel Distributed Processing, IPDPS, 2013, pp. 813\u2013824. http:\/\/dx.doi.org\/10.1109\/IPDPS.2013.112.","DOI":"10.1109\/IPDPS.2013.112"},{"issue":"12","key":"10.1016\/j.jpdc.2017.02.010_br000120","doi-asserted-by":"crossref","first-page":"3176","DOI":"10.1016\/j.jpdc.2014.06.002","article-title":"A massively parallel tensor contraction framework for coupled-cluster computations","volume":"74","author":"Solomonik","year":"2014","journal-title":"J. Parallel Distrib. Comput."},{"key":"10.1016\/j.jpdc.2017.02.010_br000125","unstructured":"J. Stanton, J. Gauss, M. Harding, P. Szalay, A. Auer, R. Bartlett, U. Benedikt, C. Berger, D. Bernholdt, Y. Bomble, O. Christiansen, M. Heckert, O. Heun, C. Huber, T.-C. Jagau, D. Jonsson, J. Juselius, K. Klein, W. Lauderdale, D. Matthews, T. Metzroth, D. O\u2019Neill, D. Price, E. Prochnow, K. Ruud, F. Schiffmann, S. Stopkowicz, M. Varner, J. Vazquez, F. Wang, J. Watts, CFOUR, Coupled Cluster techniques for Computational Chemistry, a quantum-chemical program package (www.cfour.de)."},{"issue":"1","key":"10.1016\/j.jpdc.2017.02.010_br000130","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1007\/s10586-011-0171-x","article-title":"An overview of energy efficiency techniques in cluster computing systems","volume":"16","author":"Valentini","year":"2013","journal-title":"Cluster Comput."},{"issue":"9","key":"10.1016\/j.jpdc.2017.02.010_br000135","doi-asserted-by":"crossref","first-page":"1477","DOI":"10.1016\/j.cpc.2010.04.018","article-title":"Nwchem: A comprehensive and scalable open-source solution for large scale molecular simulations","volume":"181","author":"Valiev","year":"2010","journal-title":"Comput. Phys. Comm."},{"key":"10.1016\/j.jpdc.2017.02.010_br000140","series-title":"Summa: Scalable universal matrix multiplication algorithm, Tech. Rep.","author":"Van de Geijn","year":"1995"}],"container-title":["Journal of Parallel and Distributed Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0743731517300783?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0743731517300783?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2023,7,21]],"date-time":"2023-07-21T01:15:52Z","timestamp":1689902152000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0743731517300783"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,8]]},"references-count":28,"alternative-id":["S0743731517300783"],"URL":"https:\/\/doi.org\/10.1016\/j.jpdc.2017.02.010","relation":{},"ISSN":["0743-7315"],"issn-type":[{"value":"0743-7315","type":"print"}],"subject":[],"published":{"date-parts":[[2017,8]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Cross-scale efficient tensor contractions for coupled cluster computations through multiple programming model backends","name":"articletitle","label":"Article Title"},{"value":"Journal of Parallel and Distributed Computing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.jpdc.2017.02.010","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2017 Elsevier Inc. All rights reserved.","name":"copyright","label":"Copyright"}]}}