{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,3,30]],"date-time":"2022-03-30T10:24:18Z","timestamp":1648635858686},"reference-count":23,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2016,6,3]],"date-time":"2016-06-03T00:00:00Z","timestamp":1464912000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"name":"National Natural Science Foundation of China (CN)","award":["61303070"]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2016,11]]},"DOI":"10.1007\/s11227-016-1767-y","type":"journal-article","created":{"date-parts":[[2016,6,3]],"date-time":"2016-06-03T06:23:07Z","timestamp":1464934987000},"page":"4181-4203","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Performance modeling of hyper-scale custom machine for the principal steps in block Wiedemann algorithm"],"prefix":"10.1007","volume":"72","author":[{"given":"Tong","family":"Zhou","sequence":"first","affiliation":[]},{"given":"Jingfei","family":"Jiang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,6,3]]},"reference":[{"key":"1767_CR1","doi-asserted-by":"crossref","unstructured":"Anzt H, Tomov S, Dongarra J (2015) Energy efficiency and performance frontiers for sparse computations on GPU supercomputers. In: Proceedings of the sixth international workshop on programming models and applications for multicores and manycores, pp 1\u201310. ACM","DOI":"10.1145\/2712386.2712387"},{"key":"1767_CR2","doi-asserted-by":"crossref","unstructured":"Aoki K, Shimoyama T, Ueda H (2007) Experiments on the linear algebra step in the number field sieve. In: Atsuko M, Hiroaki K, Kai R (eds) Advances in information and computer security, pp 58\u201373. Springer, Berlin","DOI":"10.1007\/978-3-540-75651-4_5"},{"key":"1767_CR3","doi-asserted-by":"crossref","unstructured":"Awad M (2009) FPGA supercomputing platforms: a survey. In: International conference on field programmable logic and applications, 2009. FPL 2009, pp 564\u2013568. IEEE","DOI":"10.1109\/FPL.2009.5272406"},{"key":"1767_CR4","unstructured":"Baskaran MM, Bordawekar R (2008) Optimizing sparse matrix-vector multiplication on GPUs using compile-time and run-time strategies. IBM Reserach Report, RC24704 (W0812-047)"},{"key":"1767_CR5","doi-asserted-by":"crossref","unstructured":"Bulu\u00e7 A, Gilbert JR (2008) On the representation and multiplication of hypersparse matrices. In: IEEE international symposium on parallel and distributed processing, 2008. IPDPS 2008, pp 1\u201311. IEEE","DOI":"10.1109\/IPDPS.2008.4536313"},{"key":"1767_CR6","unstructured":"\u00c7ataly\u00fcrek UV, Aykanat C (2001) A fine-grain hypergraph model for 2D decomposition of sparse matrices. In: Parallel and distributed processing symposium. Proceedings 15th international, pp 1199\u20131204. IEEE"},{"key":"1767_CR7","doi-asserted-by":"crossref","unstructured":"Chen C, Du Y, Jiang H, Zuo K, Yang C (2014) HPCG: preliminary evaluation and optimization on Tianhe-2 CPU-only nodes. In: 2014 IEEE 26th international symposium on computer architecture and high performance computing (SBAC-PAD), pp 41\u201348. IEEE","DOI":"10.1109\/SBAC-PAD.2014.10"},{"issue":"205","key":"1767_CR8","first-page":"333","volume":"62","author":"D Coppersmith","year":"1994","unstructured":"Coppersmith D (1994) Solving homogeneous linear equations over GF(2) via block Wiedemann algorithm. Math Comput 62(205):333\u2013350","journal-title":"Math Comput"},{"key":"1767_CR9","doi-asserted-by":"crossref","unstructured":"Dave N, Fleming K, King M, Pellauer M, Vijayaraghavan M (2007) Hardware acceleration of matrix multiplication on a xilinx FPGA. In: 5th IEEE\/ACM international conference on formal methods and models for codesign, 2007. MEMOCODE 2007, pp 97\u2013100. IEEE","DOI":"10.1109\/MEMCOD.2007.371239"},{"key":"1767_CR10","doi-asserted-by":"crossref","unstructured":"Dordopulo AI, Levin II, Doronchenko YI, Raskladkin MK (2015) High-performance reconfigurable computer systems based on virtex FPGAs. In: Victor M (ed) Parallel computing technologies, pp 349\u2013362. Springer, Berlin","DOI":"10.1007\/978-3-319-21909-7_35"},{"key":"1767_CR11","doi-asserted-by":"crossref","unstructured":"Dou Y, Vassiliadis S, Kuzmanov G, Gaydadjiev G (2005) 64-bit floating-point FPGA matrix multiplication. In: FPGA, pp 86\u201395. ACM, New York","DOI":"10.1145\/1046192.1046204"},{"key":"1767_CR12","doi-asserted-by":"crossref","unstructured":"G\u00fcneysu T, Paar C, Pfeiffer G, Schimmler M (2008) Enhancing copacobana for advanced applications in cryptography and cryptanalysis. In: International conference on field programmable logic and applications, 2008. FPL 2008, pp 675\u2013678. IEEE","DOI":"10.1109\/FPL.2008.4630037"},{"issue":"3","key":"1767_CR13","first-page":"76","volume":"2","author":"N Kapre","year":"2015","unstructured":"Kapre N, Moorthy P (2015) A case for embedded FPGA-based socs in energy-efficient acceleration of graph problems. Supercomput Front Innov 2(3):76\u201386","journal-title":"Supercomput Front Innov"},{"key":"1767_CR14","doi-asserted-by":"crossref","unstructured":"Kimball D, Michel E, Keltcher P, Wolf MM (2014) Quantifying the effect of matrix structure on multithreaded performance of the SPMV kernel. In: High performance extreme computing conference (HPEC), 2014 IEEE, pp 1\u20136. IEEE","DOI":"10.1109\/HPEC.2014.7040991"},{"key":"1767_CR15","doi-asserted-by":"crossref","unstructured":"Langr D, Tvrdik P (2015) Evaluation criteria for sparse matrix storage formats. IEEE Trans Parallel Distrib Syst 27(2):428\u2013440","DOI":"10.1109\/TPDS.2015.2401575"},{"key":"1767_CR16","doi-asserted-by":"crossref","unstructured":"Meintanis D, Papaefstathiou I (2009) A module-based partial reconfiguration design for solving sparse linear systems over GF (2). In: International conference on field-programmable technology, 2009. FPT 2009, pp 335\u2013338. IEEE","DOI":"10.1109\/FPT.2009.5377677"},{"key":"1767_CR17","unstructured":"Pichel JC, Lorenzo JA, Heras DB, Cabaleiro JC (2009) Evaluating sparse matrix-vector product on the finisterrae supercomputer. In: 9th international conference on computational and mathematical methods in science and engineering, pp 831\u2013842"},{"key":"1767_CR18","doi-asserted-by":"crossref","unstructured":"Rajovic N, Carpenter PM, Gelado I, Puzovic N, Ramirez A, Valero M (2013) Supercomputing with commodity CPUs: are mobile SoCs ready for HPC? In: 2013 international conference for high performance computing, networking, storage and analysis (SC), pp 1\u201312. IEEE","DOI":"10.1145\/2503210.2503281"},{"issue":"4","key":"1767_CR19","doi-asserted-by":"crossref","first-page":"586","DOI":"10.1002\/cpe.2896","volume":"25","author":"B Schmidt","year":"2013","unstructured":"Schmidt B, Aribowo H, Dang HV (2013) Iterative sparse matrix-vector multiplication for accelerating the block Wiedemann algorithm over GF (2) on multi-graphics processing unit systems. Concurr Comput Pract Exp 25(4):586\u2013603","journal-title":"Concurr Comput Pract Exp"},{"key":"1767_CR20","doi-asserted-by":"crossref","unstructured":"Sedaghati N, Ashari A, Pouchet LN, Parthasarathy S, Sadayappan P (2015) Characterizing dataset dependence for sparse matrix-vector multiplication on GPUs. In: Proceedings of the 2nd workshop on parallel programming for analytics applications, pp 17\u201324. ACM","DOI":"10.1145\/2726935.2726941"},{"key":"1767_CR21","doi-asserted-by":"crossref","unstructured":"Stanisic L, Videau B, Cronsioe J, Degomme A, Marangozova-Martin V, Legrand A, M\u00e9haut JF (2013) Performance analysis of HPC applications on low-power embedded platforms. In: Proceedings of the conference on design, automation and test in Europe, pp 475\u2013480. EDA Consortium","DOI":"10.7873\/DATE.2013.106"},{"key":"1767_CR22","doi-asserted-by":"crossref","unstructured":"Thom\u00e9 E (2001) Fast computation of linear generators for matrix sequences and application to the block Wiedemann algorithm. In: Proceedings of the 2001 international symposium on symbolic and algebraic computation, pp 323\u2013331. ACM","DOI":"10.1145\/384101.384145"},{"issue":"1","key":"1767_CR23","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1137\/S0036144502409019","volume":"47","author":"B Vastenhouw","year":"2005","unstructured":"Vastenhouw B, Bisseling RH (2005) A two-dimensional data distribution method for parallel sparse matrix-vector multiplication. SIAM Rev 47(1):67\u201395","journal-title":"SIAM Rev"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-016-1767-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11227-016-1767-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-016-1767-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,9]],"date-time":"2019-09-09T02:55:37Z","timestamp":1567997737000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11227-016-1767-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,6,3]]},"references-count":23,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2016,11]]}},"alternative-id":["1767"],"URL":"https:\/\/doi.org\/10.1007\/s11227-016-1767-y","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,6,3]]}}}