{"id":"https://openalex.org/W4391800639","doi":"https://doi.org/10.48550/arxiv.2402.06935","title":"Taxonomic classification with maximal exact matches in KATKA kernels and\n minimizer digests","display_name":"Taxonomic classification with maximal exact matches in KATKA kernels and\n minimizer digests","publication_year":2024,"publication_date":"2024-02-10","ids":{"openalex":"https://openalex.org/W4391800639","doi":"https://doi.org/10.48550/arxiv.2402.06935"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2402.06935","pdf_url":"http://arxiv.org/pdf/2402.06935","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2402.06935","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093925769","display_name":"Dominika Draesslerov\u00e1","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Draesslerov\u00e1, Dominika","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062095386","display_name":"Omar Ahmed","orcid":"https://orcid.org/0000-0002-9933-8508"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ahmed, Omar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013172801","display_name":"Travis Gagie","orcid":"https://orcid.org/0000-0003-3689-327X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gagie, Travis","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025928389","display_name":"Jan Holub","orcid":"https://orcid.org/0000-0003-3350-534X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Holub, Jan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009556658","display_name":"Ben Langmead","orcid":"https://orcid.org/0000-0003-2437-1976"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Langmead, Ben","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049381844","display_name":"Giovanni Manzini","orcid":"https://orcid.org/0000-0002-5047-0196"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Manzini, Giovanni","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5080743153","display_name":"Gonzalo Navarro","orcid":"https://orcid.org/0000-0002-2286-741X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Navarro, Gonzalo","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.7737,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.7737,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10820","display_name":"Fuzzy Logic and Control Systems","score":0.7144,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.6439,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.66068846},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.42251953},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.32086557}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2402.06935","pdf_url":"http://arxiv.org/pdf/2402.06935","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2402.06935","pdf_url":"http://arxiv.org/pdf/2402.06935","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4245490552","https://openalex.org/W4238204885","https://openalex.org/W3002753104","https://openalex.org/W2142036596","https://openalex.org/W2077600819","https://openalex.org/W2061531152","https://openalex.org/W2042127053","https://openalex.org/W2007980826","https://openalex.org/W1979597421","https://openalex.org/W1587224694"],"abstract_inverted_index":{"For":[0,61,235],"taxonomic":[1],"classification,":[2],"we":[3,21,63,151,214,239],"are":[4,173],"asked":[5],"to":[6,29,58,157],"index":[7],"the":[8,31,70,71,74,87,90,94,105,115,121,124,136,140,143,154,163,176,224,247],"genomes":[9,72,122,141],"in":[10,73,77,83,89,101,111,142,175,223],"a":[11,18,25,84,166,185,188,191,195,199],"phylogenetic":[12],"tree":[13,75,144],"such":[14,42],"that":[15,35,50,98,112,172],"later,":[16],"given":[17],"DNA":[19],"read,":[20,85],"can":[22,56,64],"quickly":[23],"choose":[24],"small":[26],"subtree":[27],"likely":[28],"contain":[30],"genome":[32],"from":[33,226],"which":[34,169,227],"read":[36],"was":[37],"drawn.":[38],"Although":[39],"popular":[40],"classifiers":[41],"as":[43],"Kraken":[44],"use":[45],"$k$-mers,":[46],"recent":[47],"research":[48],"indicates":[49],"using":[51],"maximal":[52],"exact":[53],"matches":[54],"(MEMs)":[55],"lead":[57],"better":[59],"classifications.":[60],"example,":[62],"build":[65],"an":[66],"augmented":[67],"FM-index":[68],"over":[69],"concatenated":[76],"left-to-right":[78],"order;":[79],"for":[80,184],"each":[81],"MEM":[82],"find":[86,104],"interval":[88],"suffix":[91],"array":[92],"containing":[93,123],"starting":[95],"positions":[96],"of":[97,120,139,162,181,194,206],"MEM's":[99],"occurrences":[100],"those":[102,127,228],"genomes;":[103],"minimum":[106],"and":[107,202,210],"maximum":[108],"values":[109],"stored":[110],"interval;":[113],"take":[114],"lowest":[116],"common":[117],"ancestor":[118],"(LCA)":[119],"characters":[125,171],"at":[126],"positions.":[128],"This":[129],"solution":[130,156],"is":[131,145],"practical,":[132],"however,":[133],"only":[134,222,244],"when":[135],"total":[137],"size":[138],"fairly":[146],"small.":[147],"In":[148],"this":[149],"paper":[150],"consider":[152],"applying":[153],"same":[155],"three":[158,204],"lossily":[159],"compressed":[160],"representations":[161,205],"genomes'":[164],"concatenation:":[165],"KATKA":[167,192],"kernel,":[168],"discards":[170],"not":[174],"first":[177],"or":[178],"last":[179],"occurrence":[180],"any":[182],"$k_{\\max}$-tuple,":[183],"parameter":[186,212,237],"$k_{\\max}$;":[187],"minimizer":[189,196],"digest;":[190],"kernel":[193],"digest.":[197],"With":[198],"test":[200],"dataset":[201],"these":[203],"it,":[207],"simulated":[208],"reads":[209,229],"various":[211],"settings,":[213],"checked":[215],"how":[216],"many":[217],"reads'":[218],"longest":[219],"MEMs":[220],"occurred":[221],"sequences":[225],"were":[230],"generated":[231],"(``true":[232],"positive''":[233],"reads).":[234],"some":[236],"settings":[238],"achieved":[240],"significant":[241],"compression":[242],"while":[243],"slightly":[245],"decreasing":[246],"true-positive":[248],"rate.":[249]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4391800639","counts_by_year":[],"updated_date":"2024-12-14T00:26:41.199754","created_date":"2024-02-14"}