{"id":"https://openalex.org/W2157585189","doi":"https://doi.org/10.1109/ipdps.2011.289","title":"Comprehensive Performance Monitoring for GPU Cluster Systems","display_name":"Comprehensive Performance Monitoring for GPU Cluster Systems","publication_year":2011,"publication_date":"2011-05-01","ids":{"openalex":"https://openalex.org/W2157585189","doi":"https://doi.org/10.1109/ipdps.2011.289","mag":"2157585189"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdps.2011.289","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043839305","display_name":"Karl F\u00fcrlinger","orcid":"https://orcid.org/0000-0003-0398-4087"},"institutions":[{"id":"https://openalex.org/I8204097","display_name":"Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen","ror":"https://ror.org/05591te55","country_code":"DE","type":"education","lineage":["https://openalex.org/I8204097"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Karl Furlinger","raw_affiliation_strings":["Computer Science Department, MNM Team, Ludwig-Maximilians Universit\u00e4t Munich, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, MNM Team, Ludwig-Maximilians Universit\u00e4t Munich, Munich, Germany","institution_ids":["https://openalex.org/I8204097"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070066590","display_name":"Nicholas J. Wright","orcid":"https://orcid.org/0000-0003-1883-6108"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nicholas J. Wright","raw_affiliation_strings":["Lawrence Berkeley National Laboratory, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, Berkeley, CA, USA","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5012176604","display_name":"David Skinner","orcid":"https://orcid.org/0000-0002-8169-3958"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Skinner","raw_affiliation_strings":["Lawrence Berkeley National Laboratory, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, Berkeley, CA, USA","institution_ids":["https://openalex.org/I148283060"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.871,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":7,"citation_normalized_percentile":{"value":0.541385,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":82,"max":83},"biblio":{"volume":null,"issue":null,"first_page":"1377","last_page":"1386"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9995,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/profiling","display_name":"Profiling (computer programming)","score":0.7079378},{"id":"https://openalex.org/keywords/gpu-cluster","display_name":"GPU cluster","score":0.59723103}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8749516},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.8180467},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.7757311},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.7079378},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.6081389},{"id":"https://openalex.org/C2781335571","wikidata":"https://www.wikidata.org/wiki/Q2633544","display_name":"GPU cluster","level":3,"score":0.59723103},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.5864597},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5068858},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.44439146},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4321165},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.42754868},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.42606902},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.39330378},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.23858935},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.21154183},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.0890204},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdps.2011.289","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":10,"referenced_works":["https://openalex.org/W1512862109","https://openalex.org/W1533854156","https://openalex.org/W1555873941","https://openalex.org/W1965351873","https://openalex.org/W1969399184","https://openalex.org/W2016279572","https://openalex.org/W2098426571","https://openalex.org/W2136895839","https://openalex.org/W2170611190","https://openalex.org/W2395771004"],"related_works":["https://openalex.org/W3048701459","https://openalex.org/W2546223573","https://openalex.org/W240129890","https://openalex.org/W2389600408","https://openalex.org/W2370314112","https://openalex.org/W2364044215","https://openalex.org/W2149078538","https://openalex.org/W2080146221","https://openalex.org/W1963859303","https://openalex.org/W1912958759"],"abstract_inverted_index":{"Accelerating":[0],"applications":[1],"with":[2,42],"GPUs":[3],"has":[4],"recently":[5],"garnered":[6],"a":[7,27,117],"lot":[8],"of":[9,29,35,56,74,82,110],"interest":[10],"from":[11],"the":[12,32,36,53,57,75,80,105],"scientific":[13,119],"computing":[14],"community.":[15],"While":[16],"tools":[17],"for":[18,31,93,100],"optimizing":[19],"individual":[20],"kernels":[21],"are":[22,59],"readily":[23],"available,":[24],"there":[25],"is":[26],"lack":[28],"support":[30],"specific":[33],"needs":[34],"HPC":[37],"area.":[38],"Most":[39],"importantly,":[40],"integration":[41],"existing":[43],"parallel":[44],"programming":[45],"models":[46],"(MPI":[47],"and":[48,50,71,85,96,108,113],"threading)":[49],"scalability":[51],"to":[52,116],"full":[54,118],"size":[55],"machine":[58],"required.":[60],"To":[61],"address":[62],"these":[63],"issues":[64],"we":[65],"present":[66],"our":[67,83,111],"work":[68],"on":[69],"monitoring":[70,106],"performance":[72],"evaluation":[73],"CUDA":[76],"runtime":[77],"environment":[78],"in":[79],"context":[81],"scalable":[84],"efficient":[86],"profiling":[87],"tool":[88],"IPM.":[89],"We":[90,103],"derive":[91],"metrics":[92],"GPU":[94],"utilization":[95],"identify":[97],"missed":[98],"opportunities":[99],"GPU-CPU":[101],"overlap.":[102],"evaluate":[104],"accuracy":[107],"overheads":[109],"approach":[112],"apply":[114],"it":[115],"application.":[120]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2157585189","counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2012,"cited_by_count":4}],"updated_date":"2024-12-06T22:30:33.457422","created_date":"2016-06-24"}