{"id":"https://openalex.org/W2979041789","doi":"https://doi.org/10.1109/tpds.2019.2944602","title":"cCUDA: Effective Co-Scheduling of Concurrent Kernels on GPUs","display_name":"cCUDA: Effective Co-Scheduling of Concurrent Kernels on GPUs","publication_year":2019,"publication_date":"2019-09-30","ids":{"openalex":"https://openalex.org/W2979041789","doi":"https://doi.org/10.1109/tpds.2019.2944602","mag":"2979041789"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2019.2944602","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090107227","display_name":"S. Kazem Shekofteh","orcid":"https://orcid.org/0000-0002-8783-6243"},"institutions":[{"id":"https://openalex.org/I86958956","display_name":"Ferdowsi University of Mashhad","ror":"https://ror.org/00g6ka752","country_code":"IR","type":"education","lineage":["https://openalex.org/I86958956"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"S.-Kazem Shekofteh","raw_affiliation_strings":["Department of Computer Engineering, Ferdowsi University of Mashhad, Mashhad, Iran"],"affiliations":[{"raw_affiliation_string":"Department of Computer Engineering, Ferdowsi University of Mashhad, Mashhad, Iran","institution_ids":["https://openalex.org/I86958956"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049093033","display_name":"Hamid Noori","orcid":"https://orcid.org/0000-0003-1410-6781"},"institutions":[{"id":"https://openalex.org/I86958956","display_name":"Ferdowsi University of Mashhad","ror":"https://ror.org/00g6ka752","country_code":"IR","type":"education","lineage":["https://openalex.org/I86958956"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Hamid Noori","raw_affiliation_strings":["Department of Computer Engineering, Ferdowsi University of Mashhad, Mashhad, Iran"],"affiliations":[{"raw_affiliation_string":"Department of Computer Engineering, Ferdowsi University of Mashhad, Mashhad, Iran","institution_ids":["https://openalex.org/I86958956"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000670364","display_name":"Mahmoud Naghibzadeh","orcid":null},"institutions":[{"id":"https://openalex.org/I86958956","display_name":"Ferdowsi University of Mashhad","ror":"https://ror.org/00g6ka752","country_code":"IR","type":"education","lineage":["https://openalex.org/I86958956"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Mahmoud Naghibzadeh","raw_affiliation_strings":["Department of Computer Engineering, Ferdowsi University of Mashhad, Mashhad, Iran"],"affiliations":[{"raw_affiliation_string":"Department of Computer Engineering, Ferdowsi University of Mashhad, Mashhad, Iran","institution_ids":["https://openalex.org/I86958956"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014251134","display_name":"Holger Fr\u00f6ning","orcid":"https://orcid.org/0000-0001-9562-0680"},"institutions":[{"id":"https://openalex.org/I223822909","display_name":"Heidelberg University","ror":"https://ror.org/038t36y30","country_code":"DE","type":"education","lineage":["https://openalex.org/I223822909"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Holger Froning","raw_affiliation_strings":["Institute of Computer Engineering, Ruprecht-Karls University of Heidelberg, Heidelberg, Germany"],"affiliations":[{"raw_affiliation_string":"Institute of Computer Engineering, Ruprecht-Karls University of Heidelberg, Heidelberg, Germany","institution_ids":["https://openalex.org/I223822909"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5064885254","display_name":"Hadi Sadoghi Yazdi","orcid":"https://orcid.org/0000-0002-6885-4956"},"institutions":[{"id":"https://openalex.org/I86958956","display_name":"Ferdowsi University of Mashhad","ror":"https://ror.org/00g6ka752","country_code":"IR","type":"education","lineage":["https://openalex.org/I86958956"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Hadi Sadoghi Yazdi","raw_affiliation_strings":["Department of Computer Engineering, Ferdowsi University of Mashhad, Mashhad, Iran"],"affiliations":[{"raw_affiliation_string":"Department of Computer Engineering, Ferdowsi University of Mashhad, Mashhad, Iran","institution_ids":["https://openalex.org/I86958956"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.772,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.815408,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":91},"biblio":{"volume":"31","issue":"4","first_page":"766","last_page":"778"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9979,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9971,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6296748},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.61579204}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.892357},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.73065525},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6296748},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.61579204},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.576481},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5508075},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.48514232},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.45652026},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.33246997},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.24716741},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.18828693},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2019.2944602","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":38,"referenced_works":["https://openalex.org/W1968391520","https://openalex.org/W1984222112","https://openalex.org/W2002555321","https://openalex.org/W2005799478","https://openalex.org/W2036741712","https://openalex.org/W2043083835","https://openalex.org/W2045271686","https://openalex.org/W2048558763","https://openalex.org/W2050127041","https://openalex.org/W2063186542","https://openalex.org/W2078994750","https://openalex.org/W2098274770","https://openalex.org/W2140348470","https://openalex.org/W2152517358","https://openalex.org/W2266773695","https://openalex.org/W2323693848","https://openalex.org/W2369964718","https://openalex.org/W2510439693","https://openalex.org/W2510980549","https://openalex.org/W2557102731","https://openalex.org/W2563567643","https://openalex.org/W2581065617","https://openalex.org/W2605178034","https://openalex.org/W2607112060","https://openalex.org/W2608738900","https://openalex.org/W2618092901","https://openalex.org/W2620346917","https://openalex.org/W2730499906","https://openalex.org/W2758151830","https://openalex.org/W2767882261","https://openalex.org/W277510608","https://openalex.org/W2791966002","https://openalex.org/W2794729807","https://openalex.org/W2910063209","https://openalex.org/W2964330525","https://openalex.org/W3141650078","https://openalex.org/W4235295270","https://openalex.org/W4301891805"],"related_works":["https://openalex.org/W98480971","https://openalex.org/W2597809628","https://openalex.org/W2164382479","https://openalex.org/W2157978810","https://openalex.org/W2150291671","https://openalex.org/W2149404148","https://openalex.org/W2146343568","https://openalex.org/W2058965144","https://openalex.org/W2027972911","https://openalex.org/W2013643406"],"abstract_inverted_index":{"While":[0],"GPUs":[1],"are":[2,85],"meantime":[3],"omnipresent":[4],"for":[5,66,198],"many":[6],"scientific":[7],"and":[8,49,143,150,166,192],"technical":[9],"computations,":[10],"they":[11,77],"still":[12],"continue":[13],"to":[14,25,98,106,133,159,169,190],"evolve":[15],"as":[16,88],"processors.":[17],"An":[18],"important":[19],"recent":[20],"feature":[21],"is":[22,119,131,205],"the":[23,40,44,54,99,107,113,135,153,172],"ability":[24],"execute":[26],"multiple":[27],"kernels":[28,71,137,145,165],"concurrently":[29],"via":[30],"queue":[31],"streams.":[32],"However,":[33,93],"experiments":[34],"show":[35],"that":[36],"different":[37,63,74,82,100,103,147,164],"parameters":[38],"including":[39],"behavior":[41],"of":[42,46,56,115,163,185,188,196,202],"kernels,":[43],"order":[45],"kernel":[47,68,95,116,154],"launches":[48],"other":[50],"execution":[51,64,148,186],"configurations,":[52],"e.g.,":[53],"number":[55],"concurrent":[57,67,128,136],"thread":[58,161],"blocks,":[59],"may":[60,72,96],"result":[61],"in":[62,183],"time":[65,187],"execution.":[69],"Since":[70],"have":[73],"resource":[75,155],"requirements,":[76],"can":[78],"be":[79],"classified":[80],"into":[81,157],"classes,":[83],"which":[84],"traditionally":[86],"assumed":[87],"either":[89],"memory-bound":[90],"or":[91],"compute-bound.":[92],"a":[94,124,199],"belong":[97],"classes":[101],"on":[102,122,177],"hardware":[104,108,179],"according":[105],"resources.":[109,174],"In":[110],"this":[111],"paper,":[112],"definition":[114],"mix":[117],"intensity":[118],"introduced.":[120],"Based":[121],"this,":[123],"scheduling":[125],"framework":[126],"called":[127],"CUDA":[129],"(cCUDA)":[130],"proposed":[132],"co-schedule":[134],"more":[138],"efficiently.":[139],"It":[140],"first":[141],"profiles":[142],"ranks":[144],"with":[146],"behaviors":[149],"then":[151],"takes":[152],"requirements":[156],"account":[158],"partition":[160],"blocks":[162],"overlap":[167],"them":[168],"better":[170],"utilize":[171],"GPU":[173],"Experimental":[175],"results":[176],"real":[178],"demonstrate":[180],"performance":[181],"improvement":[182],"terms":[184],"up":[189],"1.86x,":[191],"an":[193],"average":[194],"speedup":[195],"1.28x":[197],"wide":[200],"range":[201],"kernels.":[203],"cCUDA":[204],"available":[206],"at":[207],"https://github.com/kshekofteh/cCUDA.":[208]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2979041789","counts_by_year":[{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":6}],"updated_date":"2024-12-20T11:20:17.751566","created_date":"2019-10-10"}