{"id":"https://openalex.org/W2898845184","doi":"https://doi.org/10.1109/hpcs.2018.00086","title":"Modern Generative Programming for Optimizing Small Matrix-Vector Multiplication","display_name":"Modern Generative Programming for Optimizing Small Matrix-Vector Multiplication","publication_year":2018,"publication_date":"2018-07-01","ids":{"openalex":"https://openalex.org/W2898845184","doi":"https://doi.org/10.1109/hpcs.2018.00086","mag":"2898845184"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpcs.2018.00086","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013636368","display_name":"Jules P\u00e9nuchot","orcid":"https://orcid.org/0000-0002-6377-6880"},"institutions":[{"id":"https://openalex.org/I102197404","display_name":"Universit\u00e9 Paris-Sud","ror":"https://ror.org/028rypz17","country_code":"FR","type":"education","lineage":["https://openalex.org/I102197404"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Jules Penuchot","raw_affiliation_strings":["University of Paris-Sud, Orsay, France"],"affiliations":[{"raw_affiliation_string":"University of Paris-Sud, Orsay, France","institution_ids":["https://openalex.org/I102197404"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035522197","display_name":"Jo\u00ebl Falcou","orcid":"https://orcid.org/0000-0001-5380-7375"},"institutions":[{"id":"https://openalex.org/I102197404","display_name":"Universit\u00e9 Paris-Sud","ror":"https://ror.org/028rypz17","country_code":"FR","type":"education","lineage":["https://openalex.org/I102197404"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Joel Falcou","raw_affiliation_strings":["University of Paris-Sud, Orsay, France"],"affiliations":[{"raw_affiliation_string":"University of Paris-Sud, Orsay, France","institution_ids":["https://openalex.org/I102197404"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013949089","display_name":"Amal Khabou","orcid":null},"institutions":[{"id":"https://openalex.org/I102197404","display_name":"Universit\u00e9 Paris-Sud","ror":"https://ror.org/028rypz17","country_code":"FR","type":"education","lineage":["https://openalex.org/I102197404"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Amal Khabou","raw_affiliation_strings":["University of Paris-Sud, Orsay, France"],"affiliations":[{"raw_affiliation_string":"University of Paris-Sud, Orsay, France","institution_ids":["https://openalex.org/I102197404"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.193,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":3,"citation_normalized_percentile":{"value":0.395818,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":74,"max":77},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9959,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9892,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/loop-unrolling","display_name":"Loop unrolling","score":0.764493},{"id":"https://openalex.org/keywords/x86","display_name":"x86","score":0.7226901},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6301712},{"id":"https://openalex.org/keywords/vectorization","display_name":"Vectorization (mathematics)","score":0.522457},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4366692}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8237796},{"id":"https://openalex.org/C76970557","wikidata":"https://www.wikidata.org/wiki/Q1869750","display_name":"Loop unrolling","level":3,"score":0.764493},{"id":"https://openalex.org/C170723468","wikidata":"https://www.wikidata.org/wiki/Q182933","display_name":"x86","level":3,"score":0.7226901},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6687985},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6301712},{"id":"https://openalex.org/C41681595","wikidata":"https://www.wikidata.org/wiki/Q7917855","display_name":"Vectorization (mathematics)","level":2,"score":0.522457},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.46024698},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4366692},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.42058054},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.36581814},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.33724725},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.23160538},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.20794025},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.18385106},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.092282236},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpcs.2018.00086","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.69,"display_name":"Quality education"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":20,"referenced_works":["https://openalex.org/W1489689515","https://openalex.org/W1500250067","https://openalex.org/W1534913787","https://openalex.org/W1598340254","https://openalex.org/W1650987719","https://openalex.org/W2009775190","https://openalex.org/W2050090934","https://openalex.org/W2064872546","https://openalex.org/W2088427879","https://openalex.org/W2125960020","https://openalex.org/W2252007067","https://openalex.org/W2274477847","https://openalex.org/W2291372353","https://openalex.org/W2623469754","https://openalex.org/W2911646362","https://openalex.org/W2913602891","https://openalex.org/W4229666556","https://openalex.org/W4242122368","https://openalex.org/W4246781209","https://openalex.org/W70764853"],"related_works":["https://openalex.org/W752783541","https://openalex.org/W4312354936","https://openalex.org/W4287593139","https://openalex.org/W4239424132","https://openalex.org/W4206811032","https://openalex.org/W3099313426","https://openalex.org/W3008658471","https://openalex.org/W2995605830","https://openalex.org/W2735758428","https://openalex.org/W1506547947"],"abstract_inverted_index":{"BLAS-level":[0],"functions":[1],"are":[2,77,139],"the":[3,29,63,99,121],"cornerstone":[4],"of":[5,9,15,22,35,65,101,128,156],"a":[6,12,47,85,117,145],"large":[7,13],"subset":[8],"applications.":[10],"If":[11],"body":[14],"work":[16],"surrounding":[17],"efficient":[18,69,141],"and":[19,59,111,142],"large-scale":[20],"implementation":[21],"some":[23],"routines":[24,37],"such":[25,56,74],"as":[26,57],"gemv":[27,105,123],"exists,":[28],"interest":[30],"for":[31,73,107],"small-sized,":[32],"highly-optimized":[33],"versions":[34],"those":[36],"emerged.":[38],"In":[39,95],"this":[40],"paper,":[41],"we":[42,97],"propose":[43],"to":[44,91,133,150],"show":[45,115],"how":[46],"modern":[48],"C++":[49,137],"approach":[50],"based":[51],"on":[52,125],"generative":[53],"programming":[54,88],"techniques":[55],"vectorization":[58],"loop":[60],"unrolling":[61],"in":[62],"framework":[64],"meta-programming":[66],"can":[67],"deliver":[68],"automatically":[70,102],"generated":[71,103],"codes":[72],"routines,":[75],"that":[76,135,147,155],"competitive":[78],"with":[79,84,120],"existing,":[80],"hand-tuned":[81],"library":[82],"kernels":[83,106,138],"very":[86,140],"low":[87],"effort":[89],"compared":[90],"writing":[92],"assembly":[93],"code.":[94],"particular,":[96],"analyze":[98],"performance":[100,118,146],"small-sized":[104],"both":[108],"Intel":[109],"x86":[110],"ARM":[112],"processors.":[113],"We":[114],"through":[116],"comparison":[119],"OpenBLAS":[122,157],"kernel":[124],"small":[126],"matrices":[127],"sizes":[129],"ranging":[130],"from":[131],"4":[132],"32":[134],"our":[136],"may":[143],"have":[144],"is":[148],"up":[149],"3":[151],"times":[152],"better":[153],"than":[154],"gemv.":[158]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2898845184","counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2021,"cited_by_count":1}],"updated_date":"2025-01-05T12:33:10.742514","created_date":"2018-11-09"}