{"id":"https://openalex.org/W4200390600","doi":"https://doi.org/10.1109/ia354616.2021.00010","title":"Accelerating unstructured-grid CFD algorithms on NVIDIA and AMD GPUs","display_name":"Accelerating unstructured-grid CFD algorithms on NVIDIA and AMD GPUs","publication_year":2021,"publication_date":"2021-11-01","ids":{"openalex":"https://openalex.org/W4200390600","doi":"https://doi.org/10.1109/ia354616.2021.00010"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/ia354616.2021.00010","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028437825","display_name":"Christopher Stone","orcid":"https://orcid.org/0000-0002-9621-5334"},"institutions":[{"id":"https://openalex.org/I1329048335","display_name":"National Institute of Aerospace","ror":"https://ror.org/00fadxs59","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1329048335"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher P. Stone","raw_affiliation_strings":["National Institute of Aerospace, Hampton, VA, USA"],"affiliations":[{"raw_affiliation_string":"National Institute of Aerospace, Hampton, VA, USA","institution_ids":["https://openalex.org/I1329048335"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068101094","display_name":"Aaron Walden","orcid":"https://orcid.org/0009-0004-7524-6715"},"institutions":[{"id":"https://openalex.org/I1319063186","display_name":"Langley Research Center","ror":"https://ror.org/0399mhs52","country_code":"US","type":"funder","lineage":["https://openalex.org/I1319063186","https://openalex.org/I4210124779"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aaron Walden","raw_affiliation_strings":["Langley Research Center, NASA, Hampton, VA, USA"],"affiliations":[{"raw_affiliation_string":"Langley Research Center, NASA, Hampton, VA, USA","institution_ids":["https://openalex.org/I1319063186"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101789500","display_name":"Mohammad Zubair","orcid":"https://orcid.org/0000-0002-5449-1779"},"institutions":[{"id":"https://openalex.org/I81365321","display_name":"Old Dominion University","ror":"https://ror.org/04zjtrb98","country_code":"US","type":"funder","lineage":["https://openalex.org/I81365321"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mohammad Zubair","raw_affiliation_strings":["Old Dominion U., Norfolk, VA, USA"],"affiliations":[{"raw_affiliation_string":"Old Dominion U., Norfolk, VA, USA","institution_ids":["https://openalex.org/I81365321"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102899743","display_name":"Eric J. Nielsen","orcid":"https://orcid.org/0009-0002-5005-5512"},"institutions":[{"id":"https://openalex.org/I1319063186","display_name":"Langley Research Center","ror":"https://ror.org/0399mhs52","country_code":"US","type":"funder","lineage":["https://openalex.org/I1319063186","https://openalex.org/I4210124779"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Eric J. Nielsen","raw_affiliation_strings":["Langley Research Center, NASA, Hampton, VA, USA"],"affiliations":[{"raw_affiliation_string":"Langley Research Center, NASA, Hampton, VA, USA","institution_ids":["https://openalex.org/I1319063186"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.313,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.900978,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":87,"max":88},"biblio":{"volume":null,"issue":null,"first_page":"19","last_page":"26"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9986,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transpose","display_name":"Transpose","score":0.47149107},{"id":"https://openalex.org/keywords/gpu-cluster","display_name":"GPU cluster","score":0.44757017}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8693066},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.74212414},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.5955546},{"id":"https://openalex.org/C200106649","wikidata":"https://www.wikidata.org/wiki/Q223683","display_name":"Transpose","level":3,"score":0.47149107},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.45812237},{"id":"https://openalex.org/C2781335571","wikidata":"https://www.wikidata.org/wiki/Q2633544","display_name":"GPU cluster","level":3,"score":0.44757017},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.44492155},{"id":"https://openalex.org/C1633027","wikidata":"https://www.wikidata.org/wiki/Q815820","display_name":"Computational fluid dynamics","level":2,"score":0.41623646},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.38163924},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C57879066","wikidata":"https://www.wikidata.org/wiki/Q41217","display_name":"Mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/ia354616.2021.00010","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.71,"display_name":"Affordable and clean energy"}],"grants":[{"funder":"https://openalex.org/F4320316895","funder_display_name":"Aeronautics Research Mission Directorate","award_id":"NNL09AA00A"},{"funder":"https://openalex.org/F4320332378","funder_display_name":"Langley Research Center","award_id":"NNL09AA00A"}],"datasets":[],"versions":[],"referenced_works_count":6,"referenced_works":["https://openalex.org/W2095420020","https://openalex.org/W2998512843","https://openalex.org/W3118820529","https://openalex.org/W3154225063","https://openalex.org/W3188799945","https://openalex.org/W4236919333"],"related_works":["https://openalex.org/W3189307731","https://openalex.org/W2949962288","https://openalex.org/W2364686214","https://openalex.org/W2085873709","https://openalex.org/W2056717482","https://openalex.org/W2017587301","https://openalex.org/W1998560227","https://openalex.org/W1996803181","https://openalex.org/W1582436825","https://openalex.org/W1428699136"],"abstract_inverted_index":{"Computational":[0],"performance":[1,156,210],"of":[2,19,49,73,125,135,157],"the":[3,17,26,68,83,119,123,129,149,155,158,168,183,193,204,209,214],"FUN3D":[4],"unstructured-grid":[5],"computational":[6],"fluid":[7],"dynamics":[8],"(CFD)":[9],"application":[10],"on":[11,16,59,67,128,148,182,196],"GPUs":[12,151,191],"is":[13],"highly":[14],"dependent":[15],"efficiency":[18,48],"floating-point":[20],"atomic":[21,56,201],"updates":[22],"needed":[23],"to":[24,45,97,117,176],"support":[25],"irregular":[27],"cell-,":[28],"edge-,":[29],"and":[30,91,138,189,212],"node-based":[31],"data":[32],"access":[33],"patterns":[34],"in":[35,82],"massively":[36],"parallel":[37],"GPU":[38,47,71,104,131,195],"environments.":[39],"We":[40],"examine":[41],"several":[42],"optimization":[43],"methods":[44],"improve":[46],"performance-critical":[50],"kernels":[51,127,197],"that":[52],"are":[53],"dominated":[54,198],"by":[55,132,199],"update":[57,120],"costs":[58],"NVIDIA":[60,150],"V100/A100and":[61],"AMD":[62,69],"CDNA":[63],"MI100":[64,70,130,184,194,215],"GPUs.":[65],"Optimization":[66],"was":[72,152],"primary":[74],"interest":[75],"since":[76],"similar":[77],"hardware":[78],"will":[79],"be":[80],"used":[81,96],"upcoming":[84],"Frontier":[85],"supercomputer.":[86],"Techniques":[87],"combining":[88],"register":[89],"shuffling":[90],"on-chip":[92],"shared":[93],"memory":[94,144],"were":[95],"transpose":[98],"and/or":[99],"aggregate":[100],"results":[101],"amongst":[102],"collaborating":[103],"threads":[105],"before":[106],"atomically":[107,141],"updating":[108,142],"global":[109,143],"memory.":[110],"These":[111],"techniques,":[112],"along":[113],"with":[114,154],"algorithmic":[115],"optimizations":[116],"reduce":[118],"frequency,":[121],"reduced":[122,208],"run-time":[124],"select":[126],"a":[133,177],"factor":[134],"between":[136],"2.5":[137],"6.0":[139],"over":[140],"directly.":[145],"Performance":[146],"impact":[147],"mixed":[153],"V100":[159,188],"often":[160],"degraded":[161],"when":[162],"using":[163],"register-based":[164],"aggregation/transposition":[165],"techniques":[166,205],"while":[167],"A100":[169,190],"generally":[170],"benefited":[171],"from":[172],"these":[173],"methods,":[174],"though":[175],"lesser":[178],"extent":[179],"than":[180],"measured":[181],"GPU.":[185],"Overall,":[186],"both":[187],"outperformed":[192],"double-precision":[200],"updates;":[202],"however,":[203],"demonstrated":[206],"here":[207],"gap":[211],"improved":[213],"performance.":[216]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4200390600","counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":4}],"updated_date":"2025-04-18T08:30:08.213154","created_date":"2021-12-31"}