{"id":"https://openalex.org/W4403882505","doi":"https://doi.org/10.48550/arxiv.2410.02170","title":"Extracting the Potential of Emerging Hardware Accelerators for Symmetric\n Eigenvalue Decomposition","display_name":"Extracting the Potential of Emerging Hardware Accelerators for Symmetric\n Eigenvalue Decomposition","publication_year":2024,"publication_date":"2024-10-02","ids":{"openalex":"https://openalex.org/W4403882505","doi":"https://doi.org/10.48550/arxiv.2410.02170"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.02170","pdf_url":"http://arxiv.org/pdf/2410.02170","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2410.02170","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020870842","display_name":"Hansheng Wang","orcid":"https://orcid.org/0000-0003-2992-5595"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Hansheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102774058","display_name":"Lu Shi","orcid":"https://orcid.org/0000-0002-7015-385X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Lu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019102524","display_name":"Zhong-Hui Duan","orcid":"https://orcid.org/0000-0001-6561-0991"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"duan, Zhekai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020822198","display_name":"Panruo Wu","orcid":"https://orcid.org/0000-0003-1859-3580"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Panruo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101553480","display_name":"Liwei Guo","orcid":"https://orcid.org/0000-0002-6831-8360"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Liwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5091290657","display_name":"Shaoshuai Zhang","orcid":"https://orcid.org/0000-0002-9525-1659"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Shaoshuai","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":78},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9186,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9186,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11236","display_name":"Control Systems and Identification","score":0.9058,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.6516892},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.6275269},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5116837},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.38075784},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3289634},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.23620638},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.14213327},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.088799804},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.02170","pdf_url":"http://arxiv.org/pdf/2410.02170","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.02170","pdf_url":"http://arxiv.org/pdf/2410.02170","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4396701345","https://openalex.org/W4396696052","https://openalex.org/W4391913857","https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2001405890"],"abstract_inverted_index":{"Benefiting":[0],"from":[1,63],"the":[2,22,45,100,108,116,135,144,151,166,179,200,202],"advancement":[3],"of":[4,25,110,143,175,204],"hardware":[5,27,96,112,153],"accelerators":[6,28,113],"such":[7],"as":[8,57],"GPUs,":[9,197],"deep":[10,81],"neural":[11],"networks":[12],"and":[13,49,84,138,148,188,194],"scientific":[14,79],"computing":[15,23,48,97,146],"applications":[16],"can":[17],"achieve":[18],"superior":[19],"performance.":[20],"Recently,":[21],"capacity":[24,147],"emerging":[26,111,152],"has":[29,35],"increased":[30],"rapidly,":[31],"while":[32],"memory":[33],"bandwidth":[34],"not":[36],"kept":[37],"pace":[38],"with":[39],"this":[40,104],"growth.":[41],"This":[42],"disparity":[43],"exacerbates":[44],"gap":[46],"between":[47],"memory,":[50],"leading":[51],"to":[52,60,65,91,114,178,185,210],"inefficiencies":[53],"on":[54,99,150,162,191],"conventional":[55,120],"algorithms,":[56,86],"they're":[58],"likely":[59],"be":[61],"converted":[62],"compute-bound":[64],"memory-bound.":[66],"Symmetric":[67],"eigenvalue":[68],"decomposition":[69],"(EVD),":[70],"a":[71,140],"critical":[72],"operation":[73],"in":[74,119],"various":[75],"research":[76],"domains":[77],"including":[78],"computing,":[80],"learning":[82],"training,":[83],"inference":[85],"exhibits":[87],"suboptimal":[88],"performance":[89,203],"due":[90],"achieving":[92,183],"less":[93],"than":[94,213],"3\\%":[95],"utilization":[98,142],"H100":[101],"GPU.":[102],"In":[103],"paper,":[105],"we":[106,127],"analyze":[107],"features":[109],"identify":[115],"bottlenecks":[117],"inherent":[118],"EVD":[121,125,205],"algorithms.":[122],"To":[123],"improve":[124],"performance,":[126],"propose":[128],"several":[129],"algorithmic":[130],"optimizations":[131],"aimed":[132],"at":[133],"solving":[134],"memory-bound":[136],"problem":[137],"providing":[139],"better":[141],"rich":[145],"parallelism":[149],"accelerators.":[154],"Experimentally,":[155],"our":[156],"proposed":[157],"method":[158],"demonstrates":[159],"significant":[160],"speedups":[161],"tridiagonalization,":[163,182],"which":[164],"is":[165,207],"main":[167],"workload":[168],"that":[169],"takes":[170],"over":[171],"90\\%":[172],"elapsed":[173],"time":[174],"EVD,":[176],"compared":[177],"SOTA":[180],"cuSOLVER":[181],"up":[184,209],"10.1x,":[186],"7.5x,":[187],"2.3x":[189],"improvements":[190],"H100,":[192],"A100,":[193],"RTX":[195],"4090":[196],"respectively.":[198],"And":[199],"end-to-end":[201],"solver":[206],"also":[208],"4.1x":[211],"faster":[212],"cuSOVLER.":[214]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4403882505","counts_by_year":[],"updated_date":"2025-04-10T16:49:04.046188","created_date":"2024-10-30"}