{"id":"https://openalex.org/W1777209000","doi":"https://doi.org/10.1109/tpds.2014.2321742","title":"Accelerating LINPACK with MPI-OpenCL on Clusters of Multi-GPU Nodes","display_name":"Accelerating LINPACK with MPI-OpenCL on Clusters of Multi-GPU Nodes","publication_year":2014,"publication_date":"2014-06-30","ids":{"openalex":"https://openalex.org/W1777209000","doi":"https://doi.org/10.1109/tpds.2014.2321742","mag":"1777209000"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2014.2321742","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103690204","display_name":"Gangwon Jo","orcid":null},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"funder","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Gangwon Jo","raw_affiliation_strings":["Center for Manycore Programming, Department of Computer Science and Engineering, Seoul National University, Seoul 151-744, Korea"],"affiliations":[{"raw_affiliation_string":"Center for Manycore Programming, Department of Computer Science and Engineering, Seoul National University, Seoul 151-744, Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018976821","display_name":"Jeongho Nah","orcid":null},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"funder","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jeongho Nah","raw_affiliation_strings":["Center for Manycore Programming, Department of Computer Science and Engineering, Seoul National University, Seoul 151-744, Korea"],"affiliations":[{"raw_affiliation_string":"Center for Manycore Programming, Department of Computer Science and Engineering, Seoul National University, Seoul 151-744, Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100442897","display_name":"Jun Lee","orcid":"https://orcid.org/0000-0003-0992-8473"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"funder","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jun Lee","raw_affiliation_strings":["Center for Manycore Programming, Department of Computer Science and Engineering, Seoul National University, Seoul 151-744, Korea"],"affiliations":[{"raw_affiliation_string":"Center for Manycore Programming, Department of Computer Science and Engineering, Seoul National University, Seoul 151-744, Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100728095","display_name":"Jungwon Kim","orcid":"https://orcid.org/0000-0001-6594-6225"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"funder","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]},{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"funder","lineage":["https://openalex.org/I139264467"]}],"countries":["KR","US"],"is_corresponding":false,"raw_author_name":"Jungwon Kim","raw_affiliation_strings":["Center for Manycore Programming, Department of Computer Science and Engineering, Seoul National University, Seoul 151-744, Korea","Oak Ridge National Laboratory, Oak Ridge, TN 37831, USA"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory, Oak Ridge, TN 37831, USA","institution_ids":["https://openalex.org/I1289243028"]},{"raw_affiliation_string":"Center for Manycore Programming, Department of Computer Science and Engineering, Seoul National University, Seoul 151-744, Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100767182","display_name":"Jaejin Lee","orcid":"https://orcid.org/0000-0003-4638-8170"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"funder","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jaejin Lee","raw_affiliation_strings":["Center for Manycore Programming, Department of Computer Science and Engineering, Seoul National University, Seoul 151-744, Korea"],"affiliations":[{"raw_affiliation_string":"Center for Manycore Programming, Department of Computer Science and Engineering, Seoul National University, Seoul 151-744, Korea","institution_ids":["https://openalex.org/I139264467"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.438,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":27,"citation_normalized_percentile":{"value":0.936534,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":92,"max":93},"biblio":{"volume":"26","issue":"7","first_page":"1814","last_page":"1825"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9979,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9953,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.77481544},{"id":"https://openalex.org/keywords/gpu-cluster","display_name":"GPU cluster","score":0.6160576},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.47727913},{"id":"https://openalex.org/keywords/lu-decomposition","display_name":"LU decomposition","score":0.43716896},{"id":"https://openalex.org/keywords/symmetric-multiprocessor-system","display_name":"Symmetric multiprocessor system","score":0.41828287}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9262318},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8160207},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.77481544},{"id":"https://openalex.org/C2781335571","wikidata":"https://www.wikidata.org/wiki/Q2633544","display_name":"GPU cluster","level":3,"score":0.6160576},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.53833765},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.50288147},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.49393016},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.47727913},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.44153896},{"id":"https://openalex.org/C123213974","wikidata":"https://www.wikidata.org/wiki/Q833089","display_name":"LU decomposition","level":4,"score":0.43716896},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.4333549},{"id":"https://openalex.org/C172430144","wikidata":"https://www.wikidata.org/wiki/Q17111997","display_name":"Symmetric multiprocessor system","level":2,"score":0.41828287},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.37310416},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.21348462},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.15052518},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2014.2321742","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.49}],"grants":[{"funder":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea","award_id":"2013R1A3A2003664"}],"datasets":[],"versions":[],"referenced_works_count":14,"referenced_works":["https://openalex.org/W1552224009","https://openalex.org/W2030167104","https://openalex.org/W2044953540","https://openalex.org/W2071148743","https://openalex.org/W2097822204","https://openalex.org/W2123031400","https://openalex.org/W2142421493","https://openalex.org/W2143302336","https://openalex.org/W2162626314","https://openalex.org/W2169150754","https://openalex.org/W2169631286","https://openalex.org/W2170611190","https://openalex.org/W2170996201","https://openalex.org/W3141650078"],"related_works":["https://openalex.org/W3048701459","https://openalex.org/W3037515626","https://openalex.org/W240129890","https://openalex.org/W2389600408","https://openalex.org/W2370314112","https://openalex.org/W2364044215","https://openalex.org/W2149078538","https://openalex.org/W2080146221","https://openalex.org/W1963859303","https://openalex.org/W1912958759"],"abstract_inverted_index":{"OpenCL":[0,31],"is":[1,16,60,77],"an":[2,44],"open":[3],"standard":[4],"to":[5,18,26,34,104],"write":[6],"parallel":[7],"applications":[8,68],"for":[9,51,69],"heterogeneous":[10,37],"computing":[11,73],"systems.":[12,74],"Since":[13],"its":[14],"usage":[15],"restricted":[17],"a":[19,28,36,52],"single":[20],"operating":[21],"system":[22],"instance,":[23],"programmers":[24],"need":[25],"use":[27],"mix":[29],"of":[30,47,62,100,124],"and":[32,84,112,141],"MPI":[33],"program":[35],"cluster.":[38],"In":[39],"this":[40],"paper,":[41],"we":[42],"introduce":[43],"MPI-OpenCL":[45],"implementation":[46,76,118],"the":[48,63,86,98,106,110,113,125,129],"LINPACK":[49,58,82,117],"benchmark":[50,59,67],"cluster":[53,131],"with":[54,132],"multi-GPU":[55],"nodes.":[56],"The":[57],"one":[61],"most":[64],"widely":[65],"used":[66],"evaluating":[70],"high":[71],"performance":[72,107],"Our":[75,116],"based":[78],"on":[79,128],"High":[80],"Performance":[81],"(HPL)":[83],"uses":[85],"blocked":[87],"LU":[88],"decomposition":[89],"algorithm.":[90],"We":[91],"address":[92],"that":[93],"optimizations":[94],"aimed":[95],"at":[96],"reducing":[97],"overhead":[99],"CPUs":[101,111,140],"are":[102],"necessary":[103],"overcome":[105],"gap":[108],"between":[109],"multiple":[114],"GPUs.":[115,143],"achieves":[119],"93.69":[120],"Tflops":[121],"(46":[122],"percent":[123],"theoretical":[126],"peak)":[127],"target":[130],"49":[133],"nodes,":[134],"each":[135],"node":[136],"containing":[137],"two":[138],"eight-core":[139],"four":[142]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W1777209000","counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":7},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":1}],"updated_date":"2025-04-20T13:02:58.479988","created_date":"2016-06-24"}