{"id":"https://openalex.org/W2994869848","doi":"https://doi.org/10.1145/3368474.3368480","title":"Performance Improvement of a Scalable High-Order Compressible Flow Solver on Unstructured Hexahedral Grids","display_name":"Performance Improvement of a Scalable High-Order Compressible Flow Solver on Unstructured Hexahedral Grids","publication_year":2019,"publication_date":"2019-12-17","ids":{"openalex":"https://openalex.org/W2994869848","doi":"https://doi.org/10.1145/3368474.3368480","mag":"2994869848"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3368474.3368480","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110761481","display_name":"Kazuma Tago","orcid":null},"institutions":[{"id":"https://openalex.org/I2800865746","display_name":"Japan Aerospace Exploration Agency","ror":"https://ror.org/059yhyy33","country_code":"JP","type":"facility","lineage":["https://openalex.org/I2800865746"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kazuma Tago","raw_affiliation_strings":["Japan Aerospace Exploration Agency, Kanagawa, Japan"],"affiliations":[{"raw_affiliation_string":"Japan Aerospace Exploration Agency, Kanagawa, Japan","institution_ids":["https://openalex.org/I2800865746"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082228106","display_name":"T. Haga","orcid":"https://orcid.org/0000-0002-4374-2034"},"institutions":[{"id":"https://openalex.org/I2800865746","display_name":"Japan Aerospace Exploration Agency","ror":"https://ror.org/059yhyy33","country_code":"JP","type":"facility","lineage":["https://openalex.org/I2800865746"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Takanori Haga","raw_affiliation_strings":["Japan Aerospace Exploration Agency, Kanagawa, Japan"],"affiliations":[{"raw_affiliation_string":"Japan Aerospace Exploration Agency, Kanagawa, Japan","institution_ids":["https://openalex.org/I2800865746"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087059080","display_name":"Seiji Tsutsumi","orcid":null},"institutions":[{"id":"https://openalex.org/I2800865746","display_name":"Japan Aerospace Exploration Agency","ror":"https://ror.org/059yhyy33","country_code":"JP","type":"facility","lineage":["https://openalex.org/I2800865746"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Seiji Tsutsumi","raw_affiliation_strings":["Japan Aerospace Exploration Agency, Kanagawa, Japan"],"affiliations":[{"raw_affiliation_string":"Japan Aerospace Exploration Agency, Kanagawa, Japan","institution_ids":["https://openalex.org/I2800865746"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000091949","display_name":"Ryoji Takaki","orcid":null},"institutions":[{"id":"https://openalex.org/I2800865746","display_name":"Japan Aerospace Exploration Agency","ror":"https://ror.org/059yhyy33","country_code":"JP","type":"facility","lineage":["https://openalex.org/I2800865746"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Ryoji Takaki","raw_affiliation_strings":["Japan Aerospace Exploration Agency, Kanagawa, Japan"],"affiliations":[{"raw_affiliation_string":"Japan Aerospace Exploration Agency, Kanagawa, Japan","institution_ids":["https://openalex.org/I2800865746"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":61},"biblio":{"volume":null,"issue":null,"first_page":"227","last_page":"236"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10173","display_name":"Computational Fluid Dynamics and Aerodynamics","score":0.9984,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10173","display_name":"Computational Fluid Dynamics and Aerodynamics","score":0.9984,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10339","display_name":"Advanced Numerical Methods in Computational Mathematics","score":0.9971,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9963,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6524378},{"id":"https://openalex.org/keywords/stencil","display_name":"Stencil","score":0.629773},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.5152331}],"concepts":[{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.832584},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7640949},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6524378},{"id":"https://openalex.org/C76752949","wikidata":"https://www.wikidata.org/wiki/Q7607499","display_name":"Stencil","level":2,"score":0.629773},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.53372735},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.5152331},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.49708274},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.4935764},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.44181907},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.39709812},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3368474.3368480","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":10,"referenced_works":["https://openalex.org/W1971949097","https://openalex.org/W2148897203","https://openalex.org/W2161938981","https://openalex.org/W2273809747","https://openalex.org/W2332227794","https://openalex.org/W2564856016","https://openalex.org/W2620792850","https://openalex.org/W2676592635","https://openalex.org/W2762892646","https://openalex.org/W2895082611"],"related_works":["https://openalex.org/W4316371992","https://openalex.org/W3105129168","https://openalex.org/W2804920739","https://openalex.org/W2392765154","https://openalex.org/W2355089277","https://openalex.org/W233983175","https://openalex.org/W2312486021","https://openalex.org/W2186216222","https://openalex.org/W2164382479","https://openalex.org/W2058965144"],"abstract_inverted_index":{"This":[0,48],"paper":[1],"describes":[2],"LS-FLOW-HO,":[3,169],"a":[4,59,83,192],"high-order":[5,25],"compressible":[6],"flow":[7],"solver":[8],"based":[9],"on":[10,27,51,191,197,221],"the":[11,39,52,56,64,72,96,110,141,145,149,154,182,204,216],"Flux":[12,20],"Reconstruction(FR)":[13],"method,":[14],"and":[15,30,158,178],"its":[16],"performance":[17,53,196,228],"optimization.":[18],"The":[19,135,195],"Reconstruction":[21],"method":[22],"achieves":[23],"arbitrary":[24],"accuracy":[26],"unstructured":[28],"grids":[29],"is":[31,75,93,115,207,235],"suitable":[32],"for":[33,106,229],"many":[34],"core":[35],"architectures":[36],"because":[37],"of":[38,55,67,79,112,181,189],"local":[40],"data":[41],"sets":[42],"(Stencil)":[43],"involved":[44],"in":[45,144],"spatial":[46],"discretization.":[47],"study":[49],"focuses":[50],"optimization":[54],"PRIMEHPC":[57],"FX100,":[58],"Fujitsu":[60],"scalar":[61],"supercomputer.":[62],"First,":[63],"execution":[65,170,205],"time":[66,102,171,206],"sample":[68],"code":[69,80,132],"that":[70,78,81,95,215],"uses":[71,82],"BLAS":[73],"library":[74],"compared":[76],"with":[77],"sparse":[84,97,120],"matrix":[85,98,121],"multiplication":[86,99],"which":[87],"calculates":[88],"only":[89],"non-zero":[90],"values.":[91],"It":[92,212],"found":[94],"takes":[100],"less":[101],"than":[103,117],"using":[104,186,237],"DGEMM":[105],"hexahedral":[107],"elements":[108],"when":[109],"degree":[111],"interpolation":[113],"polynomial":[114],"higher":[116],"2.":[118],"Using":[119],"multiplication,":[122],"hot":[123],"spot":[124],"tuning":[125],"was":[126,137,172,200,213],"done":[127],"by":[128,139,153,162,174,209],"extracting":[129],"each":[130],"subroutine":[131],"from":[133],"LS-FLOW-HO.":[134],"speedup":[136],"confirmed":[138,214],"changing":[140],"array":[142],"structure":[143],"cell":[146],"boundary,":[147],"improving":[148],"memory/cache":[150],"access":[151],"latency":[152],"sequential":[155],"memory":[156],"access,":[157],"increasing":[159],"loop":[160,163],"length":[161],"collapsing.":[164],"Applying":[165],"these":[166],"tunings":[167],"to":[168,176,232,239,243],"reduced":[173,208],"up":[175],"40%,":[177],"reached":[179],"10.23%":[180],"theoretical":[183],"FLOPS":[184],"peak":[185],"16":[187],"threads":[188],"OpenMP":[190],"single":[193],"node.":[194],"Intel":[198],"Haswell":[199],"also":[201],"shown":[202],"as":[203],"about":[210],"49%.":[211],"proposed":[217],"techniques":[218],"are":[219],"effective":[220],"other":[222],"processors.":[223],"Finally,":[224],"sustained":[225],"strong":[226],"scaling":[227],"real":[230],"application":[231],"supersonic":[233],"jets":[234],"demonstrated":[236],"32":[238],"3200":[240],"nodes":[241],"(1024":[242],"102400":[244],"cores).":[245]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2994869848","counts_by_year":[],"updated_date":"2024-12-09T04:27:54.977146","created_date":"2019-12-26"}