{"id":"https://openalex.org/W4400703021","doi":"https://doi.org/10.48550/arxiv.2407.09941","title":"Hydra: Bidirectional State Space Models Through Generalized Matrix\n Mixers","display_name":"Hydra: Bidirectional State Space Models Through Generalized Matrix\n Mixers","publication_year":2024,"publication_date":"2024-07-13","ids":{"openalex":"https://openalex.org/W4400703021","doi":"https://doi.org/10.48550/arxiv.2407.09941"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.09941","pdf_url":"http://arxiv.org/pdf/2407.09941","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2407.09941","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010331532","display_name":"Sukjun Hwang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hwang, Sukjun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104621327","display_name":"Aakash Lahoti","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lahoti, Aakash","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091734792","display_name":"Tri Dao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dao, Tri","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5025386668","display_name":"Albert Gu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gu, Albert","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.910866,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":78,"max":89},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T12205","display_name":"Time Series Analysis and Forecasting","score":0.897,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12205","display_name":"Time Series Analysis and Forecasting","score":0.897,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12592","display_name":"Opinion Dynamics and Social Influence","score":0.8373,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lernaean-hydra","display_name":"Lernaean Hydra","score":0.836059},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.59932667},{"id":"https://openalex.org/keywords/matrix-model","display_name":"Matrix model","score":0.5611622}],"concepts":[{"id":"https://openalex.org/C34781881","wikidata":"https://www.wikidata.org/wiki/Q170379","display_name":"Lernaean Hydra","level":2,"score":0.836059},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.6269766},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.599861},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.59932667},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.58311343},{"id":"https://openalex.org/C2988889268","wikidata":"https://www.wikidata.org/wiki/Q6787876","display_name":"Matrix model","level":3,"score":0.5611622},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.447258},{"id":"https://openalex.org/C184720557","wikidata":"https://www.wikidata.org/wiki/Q7825049","display_name":"Topology (electrical circuits)","level":2,"score":0.3797173},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.30194384},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.28668752},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.20642096},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.19524023},{"id":"https://openalex.org/C33332235","wikidata":"https://www.wikidata.org/wiki/Q18362","display_name":"Theoretical physics","level":1,"score":0.19400623},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.09171799},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.085920155},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.07073611},{"id":"https://openalex.org/C95444343","wikidata":"https://www.wikidata.org/wiki/Q7141","display_name":"Cell biology","level":1,"score":0.060703576},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.09941","pdf_url":"http://arxiv.org/pdf/2407.09941","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.09941","pdf_url":"http://arxiv.org/pdf/2407.09941","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W646412623","https://openalex.org/W643538513","https://openalex.org/W2611307519","https://openalex.org/W2381253015","https://openalex.org/W2268219566","https://openalex.org/W2141305934","https://openalex.org/W2128040437","https://openalex.org/W2093732113","https://openalex.org/W2008467200","https://openalex.org/W1859893720"],"abstract_inverted_index":{"A":[0],"wide":[1],"array":[2],"of":[3,30,51,58,85,95,107,116,157],"sequence":[4,16,31,53,99,135,147,174],"models":[5,71,175],"are":[6],"built":[7],"on":[8,41,178,194,204],"a":[9,25,38,48,92,130,153,164,182],"framework":[10,46,128],"modeled":[11],"after":[12],"Transformers,":[13],"comprising":[14],"alternating":[15],"mixer":[17,20,28,127],"and":[18,73,81,105,118,198],"channel":[19],"layers.":[21],"This":[22,45],"paper":[23],"studies":[24],"unifying":[26],"matrix":[27,88,96,108,126,166],"view":[29],"mixers":[32,136],"that":[33],"can":[34],"be":[35],"conceptualized":[36],"as":[37,60,62,67,79,122,163],"linear":[39],"map":[40],"the":[42,56,103,113,125,158,195],"input":[43],"sequence.":[44],"encompasses":[47],"broad":[49],"range":[50],"well-known":[52],"models,":[54],"including":[55,176],"self-attention":[57],"Transformers":[59,117,177],"well":[61],"recent":[63,119],"strong":[64,114],"alternatives":[65],"such":[66,78,121],"structured":[68,87],"state":[69],"space":[70],"(SSMs),":[72],"allows":[74],"understanding":[75],"downstream":[76],"characteristics":[77],"efficiency":[80],"expressivity":[82],"through":[83],"properties":[84],"their":[86],"class.":[89],"We":[90],"identify":[91],"key":[93],"axis":[94],"parameterizations":[97],"termed":[98],"alignment,":[100],"which":[101,168],"increases":[102],"flexibility":[104],"performance":[106,115,171],"mixers,":[109],"providing":[110],"insights":[111],"into":[112],"SSMs":[120],"Mamba.":[123],"Furthermore,":[124],"offers":[129],"systematic":[131],"approach":[132],"to":[133,142],"developing":[134],"with":[137],"desired":[138],"properties,":[139],"allowing":[140],"us":[141],"develop":[143],"several":[144],"new":[145],"sub-quadratic":[146],"models.":[148],"In":[149],"particular,":[150],"we":[151],"propose":[152],"natural":[154],"bidirectional":[155],"extension":[156],"Mamba":[159],"model":[160],"(Hydra),":[161],"parameterized":[162],"quasiseparable":[165],"mixer,":[167],"demonstrates":[169],"superior":[170],"over":[172],"other":[173],"non-causal":[179],"tasks.":[180],"As":[181],"drop-in":[183],"replacement":[184],"for":[185],"attention":[186],"layers,":[187],"Hydra":[188],"outperforms":[189],"BERT":[190],"by":[191,200],"0.8":[192],"points":[193],"GLUE":[196],"benchmark":[197],"ViT":[199],"2%":[201],"Top-1":[202],"accuracy":[203],"ImageNet.":[205]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4400703021","counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-04-04T00:32:07.065776","created_date":"2024-07-17"}