{"id":"https://openalex.org/W4388581117","doi":"https://doi.org/10.1145/3624062.3624251","title":"Optimization of Ported CFD Kernels on Intel Data Center GPU Max 1550 using oneAPI ESIMD","display_name":"Optimization of Ported CFD Kernels on Intel Data Center GPU Max 1550 using oneAPI ESIMD","publication_year":2023,"publication_date":"2023-11-10","ids":{"openalex":"https://openalex.org/W4388581117","doi":"https://doi.org/10.1145/3624062.3624251"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3624062.3624251","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3624062.3624251","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3624062.3624251","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101789500","display_name":"Mohammad Zubair","orcid":"https://orcid.org/0000-0002-5449-1779"},"institutions":[{"id":"https://openalex.org/I81365321","display_name":"Old Dominion University","ror":"https://ror.org/04zjtrb98","country_code":"US","type":"education","lineage":["https://openalex.org/I81365321"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mohammad Zubair","raw_affiliation_strings":["Old Dominion University, United States of America"],"affiliations":[{"raw_affiliation_string":"Old Dominion University, United States of America","institution_ids":["https://openalex.org/I81365321"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068101094","display_name":"Aaron Walden","orcid":"https://orcid.org/0009-0004-7524-6715"},"institutions":[{"id":"https://openalex.org/I1319063186","display_name":"Langley Research Center","ror":"https://ror.org/0399mhs52","country_code":"US","type":"facility","lineage":["https://openalex.org/I1319063186","https://openalex.org/I4210124779"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aaron Walden","raw_affiliation_strings":["NASA Langley Research Center, United States of America"],"affiliations":[{"raw_affiliation_string":"NASA Langley Research Center, United States of America","institution_ids":["https://openalex.org/I1319063186"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071286768","display_name":"Gabriel Nastac","orcid":"https://orcid.org/0009-0001-9672-5393"},"institutions":[{"id":"https://openalex.org/I1319063186","display_name":"Langley Research Center","ror":"https://ror.org/0399mhs52","country_code":"US","type":"facility","lineage":["https://openalex.org/I1319063186","https://openalex.org/I4210124779"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gabriel Nastac","raw_affiliation_strings":["NASA Langley Research Center, United States of America"],"affiliations":[{"raw_affiliation_string":"NASA Langley Research Center, United States of America","institution_ids":["https://openalex.org/I1319063186"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102899743","display_name":"Eric J. Nielsen","orcid":"https://orcid.org/0009-0002-5005-5512"},"institutions":[{"id":"https://openalex.org/I1319063186","display_name":"Langley Research Center","ror":"https://ror.org/0399mhs52","country_code":"US","type":"facility","lineage":["https://openalex.org/I1319063186","https://openalex.org/I4210124779"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Eric Nielsen","raw_affiliation_strings":["NASA Langley Research Center, United States of America"],"affiliations":[{"raw_affiliation_string":"NASA Langley Research Center, United States of America","institution_ids":["https://openalex.org/I1319063186"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051409453","display_name":"Christoph Bauinger","orcid":"https://orcid.org/0009-0007-0096-2321"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christoph Bauinger","raw_affiliation_strings":["Intel Corporation, USA"],"affiliations":[{"raw_affiliation_string":"Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073515895","display_name":"Xiao Zhu","orcid":"https://orcid.org/0000-0003-4946-2081"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiao Zhu","raw_affiliation_strings":["Intel Corporation, United States of America"],"affiliations":[{"raw_affiliation_string":"Intel Corporation, United States of America","institution_ids":["https://openalex.org/I1343180700"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.465,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":2,"citation_normalized_percentile":{"value":0.622835,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":78,"max":84},"biblio":{"volume":null,"issue":null,"first_page":"1705","last_page":"1712"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9989,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.997,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/porting","display_name":"Porting","score":0.8064693},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.6532373},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.53056073},{"id":"https://openalex.org/keywords/intel-ipsc","display_name":"Intel iPSC","score":0.49776438},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.449256}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8197578},{"id":"https://openalex.org/C106251023","wikidata":"https://www.wikidata.org/wiki/Q851989","display_name":"Porting","level":3,"score":0.8064693},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.79107463},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.6933845},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.6532373},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.53056073},{"id":"https://openalex.org/C199115840","wikidata":"https://www.wikidata.org/wiki/Q108649754","display_name":"Intel iPSC","level":3,"score":0.49776438},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.45764205},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.449256},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.41128173},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.1240499},{"id":"https://openalex.org/C120373497","wikidata":"https://www.wikidata.org/wiki/Q1087987","display_name":"Parallel algorithm","level":2,"score":0.11448082},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.113046855},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3624062.3624251","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3624062.3624251","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3624062.3624251","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3624062.3624251","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":15,"referenced_works":["https://openalex.org/W1977294541","https://openalex.org/W2036020138","https://openalex.org/W2112548197","https://openalex.org/W2315522472","https://openalex.org/W2926135451","https://openalex.org/W3035274195","https://openalex.org/W3047627975","https://openalex.org/W3118820529","https://openalex.org/W3150433222","https://openalex.org/W4205756563","https://openalex.org/W4230678748","https://openalex.org/W4245037559","https://openalex.org/W4317568537","https://openalex.org/W4317584151","https://openalex.org/W4379876606"],"related_works":["https://openalex.org/W3177128669","https://openalex.org/W2991848348","https://openalex.org/W2966127030","https://openalex.org/W2621501241","https://openalex.org/W2293957988","https://openalex.org/W2232473477","https://openalex.org/W2208757713","https://openalex.org/W2146636354","https://openalex.org/W2057137496","https://openalex.org/W1824519287"],"abstract_inverted_index":{"We":[0,12,49,113,140],"describe":[1],"our":[2],"experience":[3],"porting":[4],"FUN3D's":[5],"CUDA-optimized":[6,132],"kernels":[7,56,121],"to":[8,40,92,104,160],"Intel":[9,58,124,150],"oneAPI":[10,23,35],"SYCL.":[11,93],"faced":[13],"several":[14],"challenges,":[15],"including":[16],"foremost":[17],"the":[18,22,34,55,70,98,111,116,123,131,142,149],"suboptimal":[19],"performance":[20,32,117,143],"of":[21,33,72,107,118,144,148],"code":[24,36,96,101],"on":[25,110,122,134],"Intel's":[26],"new":[27],"data":[28],"center":[29],"GPU.":[30],"Suboptimal":[31],"was":[37],"due":[38],"primarily":[39],"high":[41],"register":[42,82],"spills,":[43],"memory":[44],"latency,":[45],"and":[46,84,86,137,158],"poor":[47],"vectorization.":[48],"addressed":[50],"these":[51],"issues":[52],"by":[53,102],"implementing":[54],"using":[57,152],"oneAPI's":[59],"Explicit":[60],"SIMD":[61],"SYCL":[62,100],"extension":[63],"(ESIMD)":[64],"API.":[65],"The":[66,94],"ESIMD":[67,95,120,153],"API":[68],"enables":[69],"writing":[71],"explicitly":[73],"vectorized":[74],"kernel":[75],"code,":[76],"gives":[77],"more":[78],"precise":[79],"control":[80],"over":[81],"usage":[83],"prefetching,":[85],"better":[87],"handles":[88],"thread":[89],"divergence":[90],"compared":[91,115],"outperforms":[97],"optimized":[99],"up":[103],"a":[105,145],"factor":[106],"3.6,":[108],"depending":[109],"kernel.":[112],"also":[114],"three":[119],"Data":[125],"Center":[126],"Max":[127],"1550":[128],"GPU":[129,151],"with":[130],"versions":[133],"NVIDIA":[135,156,161],"V100":[136,157],"A100":[138],"GPUs.":[139],"found":[141],"single":[146],"tile":[147],"greater":[154],"than":[155],"similar":[159],"A100.":[162]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4388581117","counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2025-01-04T03:27:15.304441","created_date":"2023-11-11"}