{"id":"https://openalex.org/W2048275941","doi":"https://doi.org/10.1109/pact.2011.59","title":"StVEC: A Vector Instruction Extension for High Performance Stencil Computation","display_name":"StVEC: A Vector Instruction Extension for High Performance Stencil Computation","publication_year":2011,"publication_date":"2011-10-01","ids":{"openalex":"https://openalex.org/W2048275941","doi":"https://doi.org/10.1109/pact.2011.59","mag":"2048275941"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/pact.2011.59","pdf_url":null,"source":{"id":"https://openalex.org/S4306419751","display_name":"International Conference on Parallel Architectures and Compilation Techniques","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://www.cs.ucla.edu/%7Epouchet/doc/pact-article.11.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029352910","display_name":"Naser Sedaghati","orcid":null},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Naser Sedaghati","raw_affiliation_strings":["Dept. of Comput. Sci. & Eng., Ohio State Univ., Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"Dept. of Comput. Sci. & Eng., Ohio State Univ., Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060668658","display_name":"Renji Thomas","orcid":null},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Renji Thomas","raw_affiliation_strings":["Dept. of Comput. Sci. & Eng., Ohio State Univ., Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"Dept. of Comput. Sci. & Eng., Ohio State Univ., Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040769038","display_name":"Louis-No\u00ebl Pouchet","orcid":"https://orcid.org/0000-0001-5103-3097"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Louis-No\u00ebl Pouchet","raw_affiliation_strings":["Dept. of Comput. Sci. & Eng., Ohio State Univ., Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"Dept. of Comput. Sci. & Eng., Ohio State Univ., Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103348809","display_name":"Radu Teodorescu","orcid":null},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Radu Teodorescu","raw_affiliation_strings":["Dept. of Comput. Sci. & Eng., Ohio State Univ., Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"Dept. of Comput. Sci. & Eng., Ohio State Univ., Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027517817","display_name":"P. Sadayappan","orcid":"https://orcid.org/0000-0002-4737-2034"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"P. Sadayappan","raw_affiliation_strings":["Dept. of Comput. Sci. & Eng., Ohio State Univ., Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"Dept. of Comput. Sci. & Eng., Ohio State Univ., Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.696,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":12,"citation_normalized_percentile":{"value":0.744052,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":86,"max":87},"biblio":{"volume":null,"issue":null,"first_page":"276","last_page":"287"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9994,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stencil","display_name":"Stencil","score":0.92232907},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.7557683},{"id":"https://openalex.org/keywords/vectorization","display_name":"Vectorization (mathematics)","score":0.44063896}],"concepts":[{"id":"https://openalex.org/C76752949","wikidata":"https://www.wikidata.org/wiki/Q7607499","display_name":"Stencil","level":2,"score":0.92232907},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8446499},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.82379234},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.7557683},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.71517515},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.64821},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.47365463},{"id":"https://openalex.org/C41681595","wikidata":"https://www.wikidata.org/wiki/Q7917855","display_name":"Vectorization (mathematics)","level":2,"score":0.44063896},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.2725242},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.14116451},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.13080269}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/pact.2011.59","pdf_url":null,"source":{"id":"https://openalex.org/S4306419751","display_name":"International Conference on Parallel Architectures and Compilation Techniques","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.672.1742","pdf_url":"http://www.cs.ucla.edu/%7Epouchet/doc/pact-article.11.pdf","source":{"id":"https://openalex.org/S4306400349","display_name":"CiteSeer X (The Pennsylvania State University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I130769515","host_organization_name":"Pennsylvania State University","host_organization_lineage":["https://openalex.org/I130769515"],"host_organization_lineage_names":["Pennsylvania State University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.672.1742","pdf_url":"http://www.cs.ucla.edu/%7Epouchet/doc/pact-article.11.pdf","source":{"id":"https://openalex.org/S4306400349","display_name":"CiteSeer X (The Pennsylvania State University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I130769515","host_organization_name":"Pennsylvania State University","host_organization_lineage":["https://openalex.org/I130769515"],"host_organization_lineage_names":["Pennsylvania State University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":23,"referenced_works":["https://openalex.org/W1494930385","https://openalex.org/W1499418450","https://openalex.org/W1504320321","https://openalex.org/W1559264847","https://openalex.org/W1986704057","https://openalex.org/W1997147891","https://openalex.org/W2003798513","https://openalex.org/W2055253125","https://openalex.org/W2063249715","https://openalex.org/W2064508949","https://openalex.org/W2099404643","https://openalex.org/W2111914791","https://openalex.org/W2118031182","https://openalex.org/W2118533539","https://openalex.org/W2132020380","https://openalex.org/W2149236697","https://openalex.org/W2150319905","https://openalex.org/W2157758640","https://openalex.org/W2162570098","https://openalex.org/W2293033107","https://openalex.org/W2464177207","https://openalex.org/W4232919122","https://openalex.org/W4245302940"],"related_works":["https://openalex.org/W4300860486","https://openalex.org/W4285390450","https://openalex.org/W4244894488","https://openalex.org/W3205799311","https://openalex.org/W3092174096","https://openalex.org/W3024308452","https://openalex.org/W2979513934","https://openalex.org/W2566637483","https://openalex.org/W2127324789","https://openalex.org/W1559264847"],"abstract_inverted_index":{"Stencil":[0],"computations":[1,16,81],"comprise":[2],"the":[3,64,75,104,134,177,183,186,202],"compute-intensive":[4],"core":[5],"of":[6,14,23,52,108,125,137,176,185,191,201],"many":[7],"scientific":[8],"applications.":[9],"The":[10],"data":[11,21,58],"access":[12],"pattern":[13],"stencil":[15,80,143],"often":[17],"requires":[18],"several":[19],"adjacent":[20],"elements":[22],"arrays":[24,71],"to":[25,103,132],"be":[26,154],"accessed":[27],"in":[28,112],"innermost":[29],"parallel":[30],"loops.":[31],"Although":[32],"such":[33,114,163],"loops":[34],"are":[35,61,72],"vectorized":[36],"by":[37,156],"current":[38,138],"compilers":[39],"like":[40],"GCC":[41],"and":[42,128,193,210],"ICC":[43],"that":[44,152],"target":[45],"short-vector":[46,139],"SIMD":[47,140],"instruction":[48,179],"sets,":[49],"a":[50,93,148,157,173,206],"number":[51],"redundant":[53],"loads":[54],"or":[55],"additional":[56],"intra-register":[57],"shuffle":[59],"operations":[60],"required,":[62],"reducing":[63],"achievable":[65],"performance.":[66],"Thus,":[67],"even":[68],"when":[69],"all":[70],"cache":[73],"resident,":[74],"peak":[76],"performance":[77,135],"achieved":[78],"with":[79,162],"is":[82],"considerably":[83],"lower":[84],"than":[85],"machine":[86],"peak.":[87],"In":[88],"this":[89,97],"paper,":[90],"we":[91,146,181],"present":[92,147],"hardware-based":[94],"solution":[95],"for":[96,142,160],"problem.":[98],"We":[99,120,197],"propose":[100,121],"an":[101,122,164,168],"extension":[102],"standard":[105],"addressing":[106,127],"mode":[107,124],"vector":[109],"floating-point":[110],"instructions":[111,165],"ISAs":[113],"as":[115,170,172],"SSE,":[116],"AVX,":[117],"VMX":[118],"etc.":[119],"extended":[123],"paired-register":[126],"its":[129],"hardware":[130],"implementation,":[131],"overcome":[133],"limitation":[136],"ISA's":[141],"computations.":[144],"Further,":[145],"code":[149],"generation":[150],"approach":[151,188],"can":[153],"used":[155],"vectorizing":[158],"compiler":[159],"processors":[161],"set.":[166],"Using":[167],"optimistic":[169],"well":[171],"pessimistic":[174],"emulation":[175],"proposed":[178,187,203],"extension,":[180],"demonstrate":[182],"effectiveness":[184],"on":[189,214],"top":[190],"SSE":[192],"AVX":[194],"capable":[195],"processors.":[196],"also":[198],"synthesize":[199],"parts":[200],"design":[204],"using":[205],"45nm":[207],"CMOS":[208],"library":[209],"show":[211],"minimal":[212],"impact":[213],"processor":[215],"cycle":[216],"time.":[217]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2048275941","counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2017,"cited_by_count":1},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":2}],"updated_date":"2025-01-04T14:21:55.468440","created_date":"2016-06-24"}