{"id":"https://openalex.org/W2104508714","doi":"https://doi.org/10.1145/1450095.1450121","title":"Efficient vectorization of SIMD programs with non-aligned and irregular data access hardware","display_name":"Efficient vectorization of SIMD programs with non-aligned and irregular data access hardware","publication_year":2008,"publication_date":"2008-10-19","ids":{"openalex":"https://openalex.org/W2104508714","doi":"https://doi.org/10.1145/1450095.1450121","mag":"2104508714"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/1450095.1450121","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111982138","display_name":"Hoseok Chang","orcid":null},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hoseok Chang","raw_affiliation_strings":["Seoul National University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Seoul National University, Seoul, South Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5113491293","display_name":"Wonyong Sung","orcid":null},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Wonyong Sung","raw_affiliation_strings":["Seoul National University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Seoul National University, Seoul, South Korea","institution_ids":["https://openalex.org/I139264467"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.069,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":27,"citation_normalized_percentile":{"value":0.87617,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":91},"biblio":{"volume":null,"issue":null,"first_page":"167","last_page":"176"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9995,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9994,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.8774241},{"id":"https://openalex.org/keywords/vectorization","display_name":"Vectorization (mathematics)","score":0.81770456},{"id":"https://openalex.org/keywords/data-access","display_name":"Data access","score":0.48076078},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.42149812},{"id":"https://openalex.org/keywords/instruction-prefetch","display_name":"Instruction prefetch","score":0.4143755}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8932066},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.8774241},{"id":"https://openalex.org/C41681595","wikidata":"https://www.wikidata.org/wiki/Q7917855","display_name":"Vectorization (mathematics)","level":2,"score":0.81770456},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.77185255},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.73431677},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.6006247},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.48076078},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.4484034},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.42620873},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.42149812},{"id":"https://openalex.org/C133588205","wikidata":"https://www.wikidata.org/wiki/Q28455645","display_name":"Instruction prefetch","level":3,"score":0.4143755},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.33720076},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.14077854},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.10166898},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/1450095.1450121","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":24,"referenced_works":["https://openalex.org/W1495550651","https://openalex.org/W1581253957","https://openalex.org/W1965638741","https://openalex.org/W1977857710","https://openalex.org/W2040243676","https://openalex.org/W2042409850","https://openalex.org/W2061171222","https://openalex.org/W2069703635","https://openalex.org/W2079834329","https://openalex.org/W2086393337","https://openalex.org/W2092425166","https://openalex.org/W2096648046","https://openalex.org/W2099404643","https://openalex.org/W2113558024","https://openalex.org/W2117285153","https://openalex.org/W2120218709","https://openalex.org/W2137249790","https://openalex.org/W2166128308","https://openalex.org/W2167639788","https://openalex.org/W2217166680","https://openalex.org/W2286684487","https://openalex.org/W2600258283","https://openalex.org/W4245302940","https://openalex.org/W63944998"],"related_works":["https://openalex.org/W75461624","https://openalex.org/W4285390450","https://openalex.org/W4244894488","https://openalex.org/W3024308452","https://openalex.org/W2566637483","https://openalex.org/W2366442643","https://openalex.org/W2127324789","https://openalex.org/W2090268225","https://openalex.org/W2021715972","https://openalex.org/W1509422975"],"abstract_inverted_index":{"Automatic":[0],"vectorization":[1],"of":[2,16,69,104,169,179],"programs":[3],"for":[4,41,65],"partitioned-ALU":[5],"SIMD":[6,70],"(Single":[7],"Instruction":[8],"Multiple":[9],"Data)":[10],"processors":[11],"has":[12],"been":[13,151],"difficult":[14],"because":[15],"not":[17,108],"only":[18,109],"data":[19,27,34,42,100],"dependency":[20],"issues":[21],"but":[22,119],"also":[23,120,138],"non-aligned":[24,31,89],"and":[25,52,78,98,131,172],"irregular":[26,33,97],"access":[28,35,63,91],"problems.":[29],"A":[30],"or":[32],"operation":[36],"incurs":[37],"many":[38],"overhead":[39,134],"cycles":[40],"alignment.":[43],"Moreover,":[44],"this":[45,57],"causes":[46],"difficulty":[47],"in":[48,166,175],"efficient":[49],"code":[50],"generation":[51],"hinders":[53],"automatic":[54],"vectorization.":[55],"In":[56],"paper,":[58],"we":[59],"employ":[60],"special":[61,146],"memory":[62,90],"hardware":[64,106,147],"improving":[66],"the":[67,74,79,82,88,94,115,123,133,155,159,167,176],"performance":[68,125,178],"processors;":[71],"one":[72],"is":[73,81],"split":[75],"line":[76],"buffer":[77],"other":[80],"packing":[83],"buffer.":[84],"The":[85,102],"former":[86],"solves":[87],"problem,":[92],"while":[93],"latter":[95],"simplifies":[96],"stride":[99],"access.":[101],"addition":[103],"these":[105,145],"units":[107],"requires":[110],"very":[111],"small":[112],"changes":[113],"to":[114,122,153],"instruction":[116],"set":[117],"architecture":[118],"contributes":[121],"significant":[124],"improvement":[126],"by":[127],"vectorizing":[128],"more":[129],"loops":[130,171],"reducing":[132],"cycles.":[135],"We":[136],"have":[137,150],"developed":[139],"an":[140,180],"auto-vectorization":[141],"compiler":[142],"which":[143,162],"utilizes":[144],"units.":[148],"Experiments":[149],"conducted":[152],"compare":[154],"proposed":[156],"method":[157],"with":[158],"conventional":[160],"one,":[161],"show":[163],"50%":[164],"increase":[165,174],"number":[168],"vectorized":[170],"77%":[173],"total":[177],"MPEG2":[181],"encoder":[182],"program.":[183]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2104508714","counts_by_year":[{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":4},{"year":2014,"cited_by_count":1},{"year":2012,"cited_by_count":3}],"updated_date":"2024-12-17T15:30:48.845851","created_date":"2016-06-24"}