{"id":"https://openalex.org/W4392575276","doi":"https://doi.org/10.48550/arxiv.2403.03234","title":"Caduceus: Bi-Directional Equivariant Long-Range DNA Sequence Modeling","display_name":"Caduceus: Bi-Directional Equivariant Long-Range DNA Sequence Modeling","publication_year":2024,"publication_date":"2024-03-04","ids":{"openalex":"https://openalex.org/W4392575276","doi":"https://doi.org/10.48550/arxiv.2403.03234"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.03234","pdf_url":"http://arxiv.org/pdf/2403.03234","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2403.03234","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024311601","display_name":"Yair Schiff","orcid":"https://orcid.org/0000-0003-0748-3706"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schiff, Yair","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027761291","display_name":"Chia-Hsiang Kao","orcid":"https://orcid.org/0009-0006-1956-5265"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kao, Chia-Hsiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029160764","display_name":"Aaron Gokaslan","orcid":"https://orcid.org/0000-0002-3575-2961"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gokaslan, Aaron","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091734792","display_name":"Tri Dao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dao, Tri","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025386668","display_name":"Albert Gu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gu, Albert","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5021338648","display_name":"Volodymyr Kuleshov","orcid":"https://orcid.org/0000-0002-5150-3308"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kuleshov, Volodymyr","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.999938,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9077,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9077,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/equivariant-map","display_name":"Equivariant map","score":0.8062359},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.67090523}],"concepts":[{"id":"https://openalex.org/C171036898","wikidata":"https://www.wikidata.org/wiki/Q256355","display_name":"Equivariant map","level":2,"score":0.8062359},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.67090523},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.57496494},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3696168},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.32635802},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.27325255},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.2525959},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.20347172},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.15566966},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.073203534}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.03234","pdf_url":"http://arxiv.org/pdf/2403.03234","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.03234","pdf_url":"http://arxiv.org/pdf/2403.03234","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3211835374","https://openalex.org/W3169304289","https://openalex.org/W3148895720","https://openalex.org/W30258475","https://openalex.org/W2748952813","https://openalex.org/W2736697936","https://openalex.org/W2548611373","https://openalex.org/W2393913406","https://openalex.org/W2356579025","https://openalex.org/W1601809778"],"abstract_inverted_index":{"Large-scale":[0],"sequence":[1],"modeling":[2,15],"has":[3],"sparked":[4],"rapid":[5],"advances":[6],"that":[7,55,69,77,108,138],"now":[8],"extend":[9],"into":[10],"biology":[11],"and":[12,33,39,62,72,101,105],"genomics.":[13],"However,":[14],"genomic":[16],"sequences":[17],"introduces":[18],"challenges":[19,54],"such":[20],"as":[21,85],"the":[22,29,37,40,58,86,90,132],"need":[23],"to":[24,65,73],"model":[25],"long-range":[26,59,97,117,125],"token":[27],"interactions,":[28],"effects":[30],"of":[31,36,44,88,93,134],"upstream":[32],"downstream":[34,120],"regions":[35],"genome,":[38],"reverse":[41],"complementarity":[42],"(RC)":[43],"DNA.":[45],"Here,":[46],"we":[47,102],"propose":[48],"an":[49],"architecture":[50],"motivated":[51],"by":[52],"these":[53],"builds":[56],"off":[57],"Mamba":[60],"block,":[61],"extends":[63],"it":[64],"a":[66,74,123],"BiMamba":[67],"component":[68],"supports":[70,79],"bi-directionality,":[71],"MambaDNA":[75,84],"block":[76],"additionally":[78],"RC":[80,94],"equivariance.":[81,144],"We":[82],"use":[83],"basis":[87],"Caduceus,":[89],"first":[91],"family":[92],"equivariant":[95],"bi-directional":[96],"DNA":[98,111],"language":[99],"models,":[100],"introduce":[103],"pre-training":[104],"fine-tuning":[106],"strategies":[107],"yield":[109],"Caduceus":[110,114,130],"foundation":[112],"models.":[113],"outperforms":[115],"previous":[116],"models":[118,137],"on":[119,122],"benchmarks;":[121],"challenging":[124],"variant":[126],"effect":[127],"prediction":[128],"task,":[129],"exceeds":[131],"performance":[133],"10x":[135],"larger":[136],"do":[139],"not":[140],"leverage":[141],"bi-directionality":[142],"or":[143]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4392575276","counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":7}],"updated_date":"2025-04-09T00:39:41.742535","created_date":"2024-03-08"}