{"id":"https://openalex.org/W4393989706","doi":"https://doi.org/10.1186/s13015-024-00259-1","title":"Space-efficient computation of k-mer dictionaries for large values of k","display_name":"Space-efficient computation of k-mer dictionaries for large values of k","publication_year":2024,"publication_date":"2024-04-05","ids":{"openalex":"https://openalex.org/W4393989706","doi":"https://doi.org/10.1186/s13015-024-00259-1","pmid":"https://pubmed.ncbi.nlm.nih.gov/38581000"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13015-024-00259-1","pdf_url":"https://almob.biomedcentral.com/counter/pdf/10.1186/s13015-024-00259-1","source":{"id":"https://openalex.org/S205663195","display_name":"Algorithms for Molecular Biology","issn_l":"1748-7188","issn":["1748-7188"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310320256"],"host_organization_lineage_names":["Springer Nature","BioMed Central"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://almob.biomedcentral.com/counter/pdf/10.1186/s13015-024-00259-1","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039549554","display_name":"Diego D\u00edaz-Dom\u00ednguez","orcid":"https://orcid.org/0000-0002-9071-0254"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Diego D\u00edaz-Dom\u00ednguez","raw_affiliation_strings":["Department of Computer Science, University of Helsinki, Pietari Kalmin katu 5, 00014, Helsinki, Finland"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Helsinki, Pietari Kalmin katu 5, 00014, Helsinki, Finland","institution_ids":["https://openalex.org/I133731052"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021976860","display_name":"Miika Leinonen","orcid":"https://orcid.org/0000-0001-9386-0963"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Miika Leinonen","raw_affiliation_strings":["Department of Computer Science, University of Helsinki, Pietari Kalmin katu 5, 00014, Helsinki, Finland"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Helsinki, Pietari Kalmin katu 5, 00014, Helsinki, Finland","institution_ids":["https://openalex.org/I133731052"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5091190630","display_name":"Leena Salmela","orcid":"https://orcid.org/0000-0002-0756-543X"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Leena Salmela","raw_affiliation_strings":["Department of Computer Science, University of Helsinki, Pietari Kalmin katu 5, 00014, Helsinki, Finland"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Helsinki, Pietari Kalmin katu 5, 00014, Helsinki, Finland","institution_ids":["https://openalex.org/I133731052"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5039549554"],"corresponding_institution_ids":["https://openalex.org/I133731052"],"apc_list":{"value":1490,"currency":"GBP","value_usd":1827,"provenance":"doaj"},"apc_paid":{"value":1490,"currency":"GBP","value_usd":1827,"provenance":"doaj"},"fwci":1.778,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":1,"citation_normalized_percentile":{"value":0.999975,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":84,"max":93},"biblio":{"volume":"19","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9945,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12029","display_name":"DNA and Biological Computing","score":0.9882,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.48049247}],"concepts":[{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.62274885},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.54547226},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.48049247},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35236198},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.22922015}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13015-024-00259-1","pdf_url":"https://almob.biomedcentral.com/counter/pdf/10.1186/s13015-024-00259-1","source":{"id":"https://openalex.org/S205663195","display_name":"Algorithms for Molecular Biology","issn_l":"1748-7188","issn":["1748-7188"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310320256"],"host_organization_lineage_names":["Springer Nature","BioMed Central"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10996146","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38581000","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13015-024-00259-1","pdf_url":"https://almob.biomedcentral.com/counter/pdf/10.1186/s13015-024-00259-1","source":{"id":"https://openalex.org/S205663195","display_name":"Algorithms for Molecular Biology","issn_l":"1748-7188","issn":["1748-7188"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310320256"],"host_organization_lineage_names":["Springer Nature","BioMed Central"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"score":0.65,"id":"https://metadata.un.org/sdg/4","display_name":"Quality education"}],"grants":[{"funder":"https://openalex.org/F4320310086","funder_display_name":"Helsingin Yliopisto","award_id":null}],"datasets":[],"versions":[],"referenced_works_count":31,"referenced_works":["https://openalex.org/W1777751085","https://openalex.org/W1972418517","https://openalex.org/W2022910024","https://openalex.org/W2057253402","https://openalex.org/W2096128575","https://openalex.org/W2105459732","https://openalex.org/W2111295912","https://openalex.org/W2123845384","https://openalex.org/W2125266506","https://openalex.org/W2133767259","https://openalex.org/W2133956160","https://openalex.org/W2136651963","https://openalex.org/W2151752966","https://openalex.org/W2155845142","https://openalex.org/W2159954944","https://openalex.org/W2597444305","https://openalex.org/W2897927784","https://openalex.org/W2949528162","https://openalex.org/W2963158913","https://openalex.org/W3018162653","https://openalex.org/W3027033297","https://openalex.org/W3085875665","https://openalex.org/W3125399386","https://openalex.org/W3150463527","https://openalex.org/W3178314485","https://openalex.org/W4214573178","https://openalex.org/W4221076494","https://openalex.org/W4225502295","https://openalex.org/W4323366466","https://openalex.org/W4383186537","https://openalex.org/W6247929"],"related_works":["https://openalex.org/W4394360958","https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2382290278","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2073681303","https://openalex.org/W2051487156","https://openalex.org/W2001405890"],"abstract_inverted_index":{"Abstract":[0],"Computing":[1],"k":[2,20,39,48,74,89,112,122,158,181,217,240,307,322,357],"-mer":[3,21,49,182,358],"frequencies":[4],"in":[5,14,260,297],"a":[6,11,41,117,180,184,191,301,316],"collection":[7],"of":[8,72,88,100,111,142,149,179],"reads":[9,106],"is":[10,83,94,146,278],"common":[12],"procedure":[13],"many":[15],"genomic":[16],"applications.":[17],"Several":[18],"state-of-the-art":[19,356],"counters":[22,359],"rely":[23],"on":[24],"hash":[25,42,119,188,228,318],"tables":[26],"to":[27,190,198,214,230,237,304,315,334,365,367],"carry":[28],"out":[29],"this":[30,277],"task":[31],"but":[32,233,273,292],"they":[33,361],"are":[34],"often":[35],"optimised":[36],"for":[37,85,121],"small":[38],"as":[40,98,219,325],"table":[43,120,189,229,319],"keeping":[44,300],"keys":[45,326],"explicitly":[46,324],"(i.e.,":[47],"sequences)":[50],"takes":[51,244],"$$O(N\\frac{k}{w})$$":[52],"":[54,127,164,204,248,266],"":[55,128,165,205,249],"O":[56,129,250],"(":[57,130,251],"N":[58,131],"":[59,134],"k":[60,135,166,206,254],"w":[61,136],"":[62,137],")":[63,138,256],"":[64,139,169,209,257],"":[65,140,170,210,258,268],"computer":[66,79],"words,":[67],"N":[68],"being":[69,269,340],"the":[70,78,147,154,176,187,200,227,239,261,270,281,306],"number":[71,148],"distinct":[73],"-mers":[75,123,159,218,308,323],"and":[76,102,183,309,327],"w":[77],"word":[80],"size,":[81],"which":[82,194],"impractical":[84],"long":[86,101],"values":[87,110],".":[90,113],"This":[91,221],"space":[92,232,295,338],"usage":[93,296],"an":[95],"important":[96],"limitation":[97],"analysis":[99],"accurate":[103],"HiFi":[104],"sequencing":[105],"can":[107,196],"require":[108],"larger":[109],"We":[114,212,312,346],"propose":[115],",":[116],"space-efficient":[118],"using":[124],"$$O(N+u\\frac{k}{w})$$":[125],"+":[132],"u":[133],"words":[141],"space,":[143],"where":[144],"u":[145],"reads.":[150],"Our":[151],"framework":[152],"exploits":[153],"fact":[155],"that":[156,329,349],"consecutive":[157],"overlap":[160],"by":[161],"$$k-1$$":[162,202],"-":[167,207],"1":[168,208],"symbols.":[171,211],"Thus,":[172],"we":[173,195],"only":[174],"store":[175],"last":[177],"symbol":[178],"pointer":[185],"within":[186,226],"previous":[192],"one,":[193],"use":[197],"recover":[199],"remaining":[201],"adapt":[213],"compute":[215],"canonical":[216,284,314,321,350],"well.":[220],"variant":[222,285],"also":[223,347],"uses":[224,332,351],"pointers":[225],"save":[231],"requires":[234],"more":[235],"work":[236],"decode":[238],"-mers.":[241],"Specifically,":[242],"it":[243],"$$O(\\sigma":[245],"^{k})$$":[246],"":[252],"\u03c3":[253,267],"":[255],"time":[259],"worst":[262],"case,":[263],"$$\\sigma$$":[264],"DNA":[271],"alphabet,":[272],"our":[274,289,330],"experiments":[275],"show":[276,328,348],"hardly":[279],"ever":[280],"case.":[282],"The":[283],"does":[286],"not":[287,363],"improve":[288],"theoretical":[290],"results":[291],"greatly":[293],"reduces":[294],"practice":[298],"while":[299,339],"competitive":[302],"performance":[303],"get":[305],"their":[310],"frequencies.":[311],"compare":[313],"regular":[317],"storing":[320],"method":[331],"up":[333],"five":[335],"times":[336,344],"less":[337,341,353],"than":[342,355],"1.5":[343],"slower.":[345],"significantly":[352],"memory":[354],"when":[360],"do":[362],"resort":[364],"disk":[366],"keep":[368],"intermediate":[369],"results.":[370]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4393989706","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2024-12-12T08:27:01.737445","created_date":"2024-04-06"}