{"id":"https://openalex.org/W2004775621","doi":"https://doi.org/10.1109/trustcom.2013.227","title":"An Automatic Parallel-Stage Decoupled Software Pipelining Parallelization Algorithm Based on OpenMP","display_name":"An Automatic Parallel-Stage Decoupled Software Pipelining Parallelization Algorithm Based on OpenMP","publication_year":2013,"publication_date":"2013-07-01","ids":{"openalex":"https://openalex.org/W2004775621","doi":"https://doi.org/10.1109/trustcom.2013.227","mag":"2004775621"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/trustcom.2013.227","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101937981","display_name":"Xiaoxian Liu","orcid":"https://orcid.org/0000-0001-6386-1527"},"institutions":[],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoxian Liu","raw_affiliation_strings":["State Key Lab. of Math. Eng., Adv. Comput., Zhengzhou, China"],"affiliations":[{"raw_affiliation_string":"State Key Lab. of Math. Eng., Adv. Comput., Zhengzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113746674","display_name":"Rongcai Zhao","orcid":null},"institutions":[],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rongcai Zhao","raw_affiliation_strings":["State Key Lab. of Math. Eng., Adv. Comput., Zhengzhou, China"],"affiliations":[{"raw_affiliation_string":"State Key Lab. of Math. Eng., Adv. Comput., Zhengzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103153385","display_name":"Lin Han","orcid":"https://orcid.org/0000-0001-7666-0150"},"institutions":[],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lin Han","raw_affiliation_strings":["State Key Lab. of Math. Eng., Adv. Comput., Zhengzhou, China"],"affiliations":[{"raw_affiliation_string":"State Key Lab. of Math. Eng., Adv. Comput., Zhengzhou, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100346828","display_name":"Peng Liu","orcid":"https://orcid.org/0000-0002-5091-8464"},"institutions":[],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Liu","raw_affiliation_strings":["State Key Lab. of Math. Eng., Adv. Comput., Zhengzhou, China"],"affiliations":[{"raw_affiliation_string":"State Key Lab. of Math. Eng., Adv. Comput., Zhengzhou, China","institution_ids":[]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.158,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":1,"citation_normalized_percentile":{"value":0.200227,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":65,"max":72},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Performance Optimization","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Performance Optimization","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed Grid Computing Systems","score":0.9975,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Reconfigurable Computing Systems and Design Methods","score":0.9925,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/software-pipelining","display_name":"Software pipelining","score":0.6163449},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel Computing","score":0.606331},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.5976675},{"id":"https://openalex.org/keywords/automatic-parallelization","display_name":"Automatic parallelization","score":0.59151703},{"id":"https://openalex.org/keywords/multicore-architectures","display_name":"Multicore Architectures","score":0.589399},{"id":"https://openalex.org/keywords/distributed-systems","display_name":"Distributed Systems","score":0.532022},{"id":"https://openalex.org/keywords/control-flow","display_name":"Control flow","score":0.5293299},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.52781594},{"id":"https://openalex.org/keywords/task-scheduling","display_name":"Task Scheduling","score":0.527409},{"id":"https://openalex.org/keywords/performance-optimization","display_name":"Performance Optimization","score":0.520832},{"id":"https://openalex.org/keywords/control-flow-graph","display_name":"Control flow graph","score":0.43426728}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8825429},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8047159},{"id":"https://openalex.org/C188854837","wikidata":"https://www.wikidata.org/wiki/Q268469","display_name":"Software pipelining","level":3,"score":0.6163449},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.5976675},{"id":"https://openalex.org/C164833996","wikidata":"https://www.wikidata.org/wiki/Q2323839","display_name":"Automatic parallelization","level":3,"score":0.59151703},{"id":"https://openalex.org/C160191386","wikidata":"https://www.wikidata.org/wiki/Q868299","display_name":"Control flow","level":2,"score":0.5293299},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.52781594},{"id":"https://openalex.org/C150202949","wikidata":"https://www.wikidata.org/wiki/Q107602","display_name":"Pointer (user interface)","level":2,"score":0.4656542},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4608386},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.45440122},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.44888085},{"id":"https://openalex.org/C27458966","wikidata":"https://www.wikidata.org/wiki/Q1187693","display_name":"Control flow graph","level":2,"score":0.43426728},{"id":"https://openalex.org/C140763907","wikidata":"https://www.wikidata.org/wiki/Q2714055","display_name":"Instruction-level parallelism","level":3,"score":0.42613548},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.41611782},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.2612474},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.15672904},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.1546956},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.12742138},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/trustcom.2013.227","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":16,"referenced_works":["https://openalex.org/W1494930385","https://openalex.org/W152868167","https://openalex.org/W161813961","https://openalex.org/W1976397984","https://openalex.org/W2028240990","https://openalex.org/W2069782598","https://openalex.org/W2104869448","https://openalex.org/W2128249697","https://openalex.org/W2144344516","https://openalex.org/W2159644421","https://openalex.org/W2167089275","https://openalex.org/W2171546589","https://openalex.org/W2315105672","https://openalex.org/W2382869634","https://openalex.org/W4242951998","https://openalex.org/W79448102"],"related_works":["https://openalex.org/W2993981457","https://openalex.org/W2168093120","https://openalex.org/W2166895275","https://openalex.org/W2138385884","https://openalex.org/W2109400628","https://openalex.org/W2104983349","https://openalex.org/W2052778103","https://openalex.org/W2029829152","https://openalex.org/W2020532170","https://openalex.org/W1582417364"],"abstract_inverted_index":{"While":[0],"multicore":[1,34,207],"processors":[2],"increase":[3],"throughput":[4],"for":[5],"multi-programmed":[6],"and":[7,16,45,98,105,176,195,216,233],"multithreaded":[8],"codes,":[9],"many":[10],"important":[11,26],"applications":[12,32],"are":[13],"single":[14],"threaded":[15,31],"thus":[17],"do":[18],"not":[19],"benefit.":[20],"Automatic":[21],"parallelization":[22,56],"techniques":[23,57],"play":[24],"an":[25,111],"role":[27],"in":[28,49,72,118,143,170],"migrating":[29],"singe":[30],"to":[33,66,173],"platforms.":[35],"Unfortunately,":[36],"the":[37,53,76,88,137,144,150,158,189],"prevalence":[38],"of":[39,78,81,95,101],"control":[40,85,196],"flow,":[41,197],"recursive":[42],"data":[43],"structures,":[44],"general":[46],"pointer":[47],"accesses":[48],"ordinary":[50,73],"programs":[51,74],"renders":[52],"traditional":[54,204],"automatic":[55],"unsuitable.":[58],"Parallel-Stage":[59],"Decoupled":[60],"Software":[61],"Pipelining":[62],"(PS-DSWP)":[63],"is":[64,122,146,168],"proposed":[65],"exploit":[67],"fine-grained":[68],"pipeline":[69],"parallelism":[70,156],"lurking":[71],"with":[75,162,191,202,221,237],"existence":[77],"all":[79],"kinds":[80],"dependences,":[82,86],"including":[83],"arbitrary":[84],"at":[87],"instruction":[89],"level.":[90],"But":[91],"it":[92],"requires":[93],"knowledge":[94],"architectural":[96],"properties":[97],"hardware":[99,185],"support":[100],"a":[102,131,210,226],"communication":[103],"channel":[104],"two":[106],"special":[107],"instructions.":[108,166],"We":[109,187,224],"propose":[110],"improved":[112],"PS-DSWP":[113,160],"algorithm":[114,145,172],"based":[115,148,164],"on":[116,126,149,165,184,206,234],"OpenMP":[117,167],"this":[119],"paper.":[120],"It":[121],"implemented":[123],"without":[124],"relying":[125],"CPU":[127],"architectures":[128],"by":[129,203],"using":[130],"high":[132,230],"level":[133],"intermediate":[134],"representation.":[135],"Moreover,":[136],"Program":[138],"Dependence":[139],"Graph":[140],"(PDG)":[141],"used":[142],"built":[147],"basic":[151],"blocks,":[152],"which":[153,198],"exploits":[154],"coarser-grained":[155],"than":[157],"original":[159],"transformation":[161],"PDG":[163],"employed":[169],"our":[171,222],"assign":[174],"task":[175],"implement":[177],"synchronization":[178],"among":[179],"threads":[180],"while":[181],"avoiding":[182],"dependence":[183],"support.":[186],"evaluate":[188],"loops":[190],"complex":[192],"memory":[193],"patterns":[194],"cannot":[199],"be":[200,214],"dealt":[201],"techniques,":[205],"platform.":[208],"As":[209],"result,":[211],"they":[212],"can":[213],"parallelized":[215],"gain":[217],"significant":[218],"performance":[219],"improvement":[220],"algorithm.":[223],"obtain":[225],"maximum":[227],"speedup":[228],"as":[229,231],"2.07x":[232],"average":[235],"1.39x":[236],"5":[238],"threads.":[239]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2004775621","counts_by_year":[{"year":2015,"cited_by_count":1}],"updated_date":"2024-11-23T12:28:24.887179","created_date":"2016-06-24"}