{"id":"https://openalex.org/W1934320078","doi":"https://doi.org/10.1109/cluster.2015.112","title":"Hybrid Communication with TCA and InfiniBand on a Parallel Programming Language XcalableACC for GPU Clusters","display_name":"Hybrid Communication with TCA and InfiniBand on a Parallel Programming Language XcalableACC for GPU Clusters","publication_year":2015,"publication_date":"2015-09-01","ids":{"openalex":"https://openalex.org/W1934320078","doi":"https://doi.org/10.1109/cluster.2015.112","mag":"1934320078"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/cluster.2015.112","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085261318","display_name":"Tetsuya Odajima","orcid":null},"institutions":[{"id":"https://openalex.org/I146399215","display_name":"University of Tsukuba","ror":"https://ror.org/02956yf07","country_code":"JP","type":"funder","lineage":["https://openalex.org/I146399215"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tetsuya Odajima","raw_affiliation_strings":["Tsukuba Daigaku, Tsukuba, Ibaraki, JP"],"affiliations":[{"raw_affiliation_string":"Tsukuba Daigaku, Tsukuba, Ibaraki, JP","institution_ids":["https://openalex.org/I146399215"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020716792","display_name":"Taisuke Boku","orcid":"https://orcid.org/0000-0001-8730-2228"},"institutions":[{"id":"https://openalex.org/I146399215","display_name":"University of Tsukuba","ror":"https://ror.org/02956yf07","country_code":"JP","type":"funder","lineage":["https://openalex.org/I146399215"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Taisuke Boku","raw_affiliation_strings":["Tsukuba Daigaku, Tsukuba, Ibaraki, JP"],"affiliations":[{"raw_affiliation_string":"Tsukuba Daigaku, Tsukuba, Ibaraki, JP","institution_ids":["https://openalex.org/I146399215"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082103530","display_name":"Toshihiro Hanawa","orcid":"https://orcid.org/0000-0002-2970-6037"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"funder","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Toshihiro Hanawa","raw_affiliation_strings":["Tokyo Daigaku, Bunkyo-ku, Tokyo, JP"],"affiliations":[{"raw_affiliation_string":"Tokyo Daigaku, Bunkyo-ku, Tokyo, JP","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103277015","display_name":"Hitoshi Murai","orcid":"https://orcid.org/0000-0002-6239-3968"},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hitoshi Murai","raw_affiliation_strings":["RIKEN Advanced Institute for Computational Science"],"affiliations":[{"raw_affiliation_string":"RIKEN Advanced Institute for Computational Science","institution_ids":["https://openalex.org/I4210129730"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089793884","display_name":"Masahiro Nakao","orcid":"https://orcid.org/0000-0001-7848-1172"},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Masahiro Nakao","raw_affiliation_strings":["RIKEN Advanced Institute for Computational Science"],"affiliations":[{"raw_affiliation_string":"RIKEN Advanced Institute for Computational Science","institution_ids":["https://openalex.org/I4210129730"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041051226","display_name":"Akihiro Tabuchi","orcid":"https://orcid.org/0009-0009-3931-3817"},"institutions":[{"id":"https://openalex.org/I146399215","display_name":"University of Tsukuba","ror":"https://ror.org/02956yf07","country_code":"JP","type":"funder","lineage":["https://openalex.org/I146399215"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Akihiro Tabuchi","raw_affiliation_strings":["Tsukuba Daigaku, Tsukuba, Ibaraki, JP"],"affiliations":[{"raw_affiliation_string":"Tsukuba Daigaku, Tsukuba, Ibaraki, JP","institution_ids":["https://openalex.org/I146399215"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040254066","display_name":"Mitsuhisa Sato","orcid":"https://orcid.org/0000-0003-0543-7116"},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Mitsuhisa Sato","raw_affiliation_strings":["RIKEN Advanced Institute for Computational Science"],"affiliations":[{"raw_affiliation_string":"RIKEN Advanced Institute for Computational Science","institution_ids":["https://openalex.org/I4210129730"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.176,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":2,"citation_normalized_percentile":{"value":0.31941,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":73,"max":76},"biblio":{"volume":null,"issue":null,"first_page":"627","last_page":"634"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9993,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/infiniband","display_name":"InfiniBand","score":0.9917052},{"id":"https://openalex.org/keywords/myrinet","display_name":"Myrinet","score":0.5753294},{"id":"https://openalex.org/keywords/stencil","display_name":"Stencil","score":0.5549703}],"concepts":[{"id":"https://openalex.org/C2781030343","wikidata":"https://www.wikidata.org/wiki/Q922437","display_name":"InfiniBand","level":2,"score":0.9917052},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8703015},{"id":"https://openalex.org/C64270927","wikidata":"https://www.wikidata.org/wiki/Q206924","display_name":"PCI Express","level":3,"score":0.7230527},{"id":"https://openalex.org/C130795937","wikidata":"https://www.wikidata.org/wiki/Q2561570","display_name":"Remote direct memory access","level":2,"score":0.69880176},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.60249543},{"id":"https://openalex.org/C2780601250","wikidata":"https://www.wikidata.org/wiki/Q1863181","display_name":"Myrinet","level":3,"score":0.5753294},{"id":"https://openalex.org/C123745756","wikidata":"https://www.wikidata.org/wiki/Q1665949","display_name":"Interconnection","level":2,"score":0.5735263},{"id":"https://openalex.org/C76752949","wikidata":"https://www.wikidata.org/wiki/Q7607499","display_name":"Stencil","level":2,"score":0.5549703},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.51607865},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.5069828},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.5014005},{"id":"https://openalex.org/C60832428","wikidata":"https://www.wikidata.org/wiki/Q945818","display_name":"Partitioned global address space","level":3,"score":0.49374685},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4594688},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.4161016},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3928192},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.38539544},{"id":"https://openalex.org/C854659","wikidata":"https://www.wikidata.org/wiki/Q1859284","display_name":"Message passing","level":2,"score":0.33571935},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3179954},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.3088348},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.28291655},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.14135584},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.103396505},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/cluster.2015.112","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":9,"referenced_works":["https://openalex.org/W1671636854","https://openalex.org/W1838011793","https://openalex.org/W1973698973","https://openalex.org/W1984096996","https://openalex.org/W2019782586","https://openalex.org/W2050930161","https://openalex.org/W2145843807","https://openalex.org/W57243869","https://openalex.org/W619618554"],"related_works":["https://openalex.org/W4246679332","https://openalex.org/W2937592512","https://openalex.org/W2148030923","https://openalex.org/W2132218766","https://openalex.org/W2104094072","https://openalex.org/W2007129194","https://openalex.org/W1978254186","https://openalex.org/W1966974913","https://openalex.org/W1934320078","https://openalex.org/W1901618855"],"abstract_inverted_index":{"For":[0,122],"the":[1,27,34,42,74,102,138,145,182,186,190,216,236,246],"execution":[2],"of":[3,77,90,124,224,231,239,251,257],"parallel":[4],"HPC":[5],"applications":[6],"on":[7,22,69,165,189,205],"GPU-ready":[8],"clusters,":[9],"high":[10,247],"communication":[11,28,57,135,210,243],"latency":[12,29,238],"between":[13,30],"GPUs":[14],"over":[15],"nodes":[16,78,91],"will":[17],"be":[18,93],"a":[19,46,80,109,133,152,166,212],"serious":[20],"problem":[21],"strong":[23],"scalability.":[24],"To":[25],"reduce":[26],"GPUs,":[31],"we":[32,150],"proposed":[33],"Tightly":[35],"Coupled":[36],"Accelerator":[37],"(TCA)":[38],"architecture":[39],"and":[40,101,116,159,161,241,249],"developed":[41],"PEACH2":[43,53,158,240],"board":[44],"as":[45,73,99,108],"proof-of-concept":[47],"interconnection":[48,154],"system":[49,81,105,136,156],"for":[50,170,193,222],"TCA.":[51],"Although":[52],"provides":[54],"very":[55],"low":[56,237],"latency,":[58],"there":[59],"are":[60],"some":[61],"hardware":[62],"limitations":[63],"due":[64],"to":[65,130,198,201,220,226],"its":[66],"implementation":[67],"depending":[68],"PCIe":[70],"technology,":[71],"such":[72,98,132],"practical":[75],"number":[76,89],"in":[79,254],"which":[82],"is":[83,106,128],"16":[84,227],"currently":[85],"named":[86,173],"sub-cluster.":[87],"More":[88],"should":[92],"connected":[94],"by":[95,120,196,218,235,245],"conventional":[96,114],"interconnections":[97],"InfiniBand,":[100,160,252],"entire":[103],"network":[104,115,119,155,184,214],"configured":[107],"hybrid":[110,153,183,213],"one":[111],"with":[112,203,211],"global":[113,242],"local":[117,232],"high-speed":[118],"PEACH2.":[121],"ease":[123],"user":[125],"programmability,":[126],"it":[127,163],"desirable":[129],"operate":[131],"complicated":[134],"at":[137],"library":[139],"or":[140],"language":[141,169],"level":[142],"(which":[143],"hides":[144],"system).":[146],"In":[147],"this":[148],"paper,":[149],"develop":[151],"combining":[157],"implement":[162],"based":[164,188],"high-level":[167],"PGAS":[168],"accelerated":[171],"clusters":[172],"XcalableACC":[174],"(XACC).":[175],"A":[176],"preliminary":[177],"performance":[178,187,217],"evaluation":[179],"confirms":[180],"that":[181],"improves":[185,215],"Himeno":[191],"benchmark":[192],"stencil":[194],"computation":[195],"up":[197,219],"40%,":[199],"relative":[200],"MVAPICH2":[202],"GDR":[204],"InfiniBand.":[206],"Additionally,":[207],"Allgather":[208],"collective":[209],"50%":[221],"networks":[223],"8":[225],"nodes.":[228],"The":[229],"combination":[230],"communication,":[233],"supported":[234,244],"bandwidth":[248],"scalability":[250],"results":[253],"an":[255],"improvement":[256],"overall":[258],"performance.":[259]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W1934320078","counts_by_year":[{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2025-03-18T17:45:27.218968","created_date":"2016-06-24"}