{"id":"https://openalex.org/W4400141862","doi":"https://doi.org/10.48550/arxiv.2406.19113","title":"MegIS: High-Performance, Energy-Efficient, and Low-Cost Metagenomic\n Analysis with In-Storage Processing","display_name":"MegIS: High-Performance, Energy-Efficient, and Low-Cost Metagenomic\n Analysis with In-Storage Processing","publication_year":2024,"publication_date":"2024-06-27","ids":{"openalex":"https://openalex.org/W4400141862","doi":"https://doi.org/10.48550/arxiv.2406.19113"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.19113","pdf_url":"http://arxiv.org/pdf/2406.19113","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2406.19113","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048764555","display_name":"Nika Mansouri Ghiasi","orcid":"https://orcid.org/0000-0002-0833-0042"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ghiasi, Nika Mansouri","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008240365","display_name":"Mohammad Sadrosadati","orcid":"https://orcid.org/0000-0002-4029-0175"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sadrosadati, Mohammad","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033347097","display_name":"Harun Mustafa","orcid":"https://orcid.org/0000-0002-2125-6086"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mustafa, Harun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052807455","display_name":"Arvid Gollwitzer","orcid":"https://orcid.org/0009-0001-2170-8089"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gollwitzer, Arvid","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066286509","display_name":"Can F\u0131rt\u0131na","orcid":"https://orcid.org/0000-0002-6548-7863"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Firtina, Can","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007205853","display_name":"Julien Eudine","orcid":"https://orcid.org/0000-0001-6482-0110"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Eudine, Julien","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008354867","display_name":"Haiyu Mao","orcid":"https://orcid.org/0000-0002-7393-4504"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mao, Haiyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042072020","display_name":"Jo\u00ebl Lindegger","orcid":"https://orcid.org/0000-0003-2581-8637"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lindegger, Jo\u00ebl","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060363985","display_name":"Meryem Banu Cavlak","orcid":"https://orcid.org/0000-0003-4475-6945"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cavlak, Meryem Banu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081055664","display_name":"Mohammed Alser","orcid":"https://orcid.org/0000-0002-6117-3701"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alser, Mohammed","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010929521","display_name":"Jisung Park","orcid":"https://orcid.org/0000-0002-1826-9003"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Park, Jisung","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5050695684","display_name":"Onur Mutlu","orcid":"https://orcid.org/0000-0002-0075-2312"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mutlu, Onur","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.586,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.586,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C15151743","wikidata":"https://www.wikidata.org/wiki/Q903778","display_name":"Metagenomics","level":3,"score":0.67226595},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5334421},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.13771647},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.19113","pdf_url":"http://arxiv.org/pdf/2406.19113","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.19113","pdf_url":"http://arxiv.org/pdf/2406.19113","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W638577851","https://openalex.org/W4391375266","https://openalex.org/W4388282505","https://openalex.org/W4242417330","https://openalex.org/W3135997498","https://openalex.org/W3013372948","https://openalex.org/W2968354375","https://openalex.org/W2889550857","https://openalex.org/W2748952813","https://openalex.org/W2359440920"],"abstract_inverted_index":{"Metagenomics":[0],"has":[1],"led":[2],"to":[3,44,79,90,106,223],"significant":[4,39],"advances":[5],"in":[6,21,86],"many":[7],"fields.":[8],"Metagenomic":[9,35],"analysis":[10,36,81,117,189],"commonly":[11],"involves":[12],"the":[13,18,52,91,100,109,114,135,197,212,215,224],"key":[14],"tasks":[15,29],"of":[16,48,94,113,174,178,214],"determining":[17],"species":[19],"present":[20],"a":[22,59],"sample":[23],"and":[24,129,133,147,164,182,200,207],"their":[25],"relative":[26],"abundances.":[27],"These":[28],"require":[30],"searching":[31],"large":[32,46],"metagenomic":[33,80,116,179,188,203,226],"databases.":[34],"suffers":[37],"from":[38,51],"data":[40,50,110,162],"movement":[41,111],"overhead":[42,112],"due":[43,89],"moving":[45],"amounts":[47],"low-reuse":[49],"storage":[53,87,136,157],"system.":[54,137],"In-storage":[55],"processing":[56,70,103,131,141],"can":[57,183],"be":[58,83,184],"fundamental":[60],"solution":[61],"for":[62,72,143],"reducing":[63],"this":[64],"overhead.":[65],"However,":[66],"designing":[67],"an":[68],"in-storage":[69,102,140,167],"system":[71,104],"metagenomics":[73,144],"is":[74,120,171],"challenging":[75],"because":[76],"existing":[77],"approaches":[78],"cannot":[82],"directly":[84],"implemented":[85],"effectively":[88,127],"hardware":[92],"limitations":[93],"modern":[95],"SSDs.":[96],"We":[97,138],"propose":[98],"MegIS,":[99],"first":[101],"designed":[105],"significantly":[107,233],"reduce":[108],"end-to-end":[115],"pipeline.":[118],"MegIS":[119,195,218],"enabled":[121],"by":[122,205],"our":[123],"lightweight":[124,166],"design":[125,170],"that":[126,194],"leverages":[128],"orchestrates":[130],"inside":[132],"outside":[134],"address":[139],"challenges":[142],"via":[145],"specialized":[146],"efficient":[148],"1)":[149],"task":[150],"partitioning,":[151],"2)":[152],"data/computation":[153],"flow":[154],"coordination,":[155],"3)":[156],"technology-aware":[158],"algorithmic":[159],"optimizations,":[160],"4)":[161],"mapping,":[163],"5)":[165],"accelerators.":[168],"MegIS's":[169],"flexible,":[172],"capable":[173],"supporting":[175],"different":[176],"types":[177],"input":[180],"datasets,":[181],"integrated":[185],"into":[186],"various":[187],"pipelines.":[190],"Our":[191],"evaluation":[192],"shows":[193],"outperforms":[196],"state-of-the-art":[198,225],"performance-":[199],"accuracy-optimized":[201,216],"software":[202],"tools":[204],"2.7$\\times$-37.2$\\times$":[206],"6.9$\\times$-100.2$\\times$,":[208],"respectively,":[209],"while":[210,231],"matching":[211],"accuracy":[213],"tool.":[217],"achieves":[219],"1.5$\\times$-5.1$\\times$":[220],"speedup":[221],"compared":[222],"hardware-accelerated":[227],"(using":[228],"processing-in-memory)":[229],"tool,":[230],"achieving":[232],"higher":[234],"accuracy.":[235]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4400141862","counts_by_year":[],"updated_date":"2024-12-10T13:58:27.959718","created_date":"2024-06-29"}