{"id":"https://openalex.org/W4294318919","doi":"https://doi.org/10.48550/arxiv.2208.14787","title":"Computing all-vs-all MEMs in run-length encoded collections of HiFi reads","display_name":"Computing all-vs-all MEMs in run-length encoded collections of HiFi reads","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4294318919","doi":"https://doi.org/10.48550/arxiv.2208.14787"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2208.14787","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2208.14787","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039549554","display_name":"Diego D\u00edaz-Dom\u00ednguez","orcid":"https://orcid.org/0000-0002-9071-0254"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"D\u00edaz-Dom\u00ednguez, Diego","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021097696","display_name":"Simon J. Puglisi","orcid":"https://orcid.org/0000-0001-7668-7636"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Puglisi, Simon J.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5091190630","display_name":"Leena Salmela","orcid":"https://orcid.org/0000-0002-0756-543X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Salmela, Leena","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":60},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9994,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9994,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11567","display_name":"semigroups and automata theory","score":0.9853,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9772,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/substring","display_name":"Substring","score":0.8182447},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5641158}],"concepts":[{"id":"https://openalex.org/C182407805","wikidata":"https://www.wikidata.org/wiki/Q2626534","display_name":"Substring","level":3,"score":0.8182447},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.61484873},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.56575406},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5641158},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.5295416},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.48557442},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.46722138},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.4492204},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.41991672},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.30606163},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.20858258},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.0771687},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2208.14787","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2208.14787","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2208.14787","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4308670360","https://openalex.org/W4291327800","https://openalex.org/W4286787697","https://openalex.org/W4230921427","https://openalex.org/W4200187525","https://openalex.org/W3043769852","https://openalex.org/W2950721324","https://openalex.org/W2051304374","https://openalex.org/W1970688438","https://openalex.org/W1505605100"],"abstract_inverted_index":{"We":[0,94,191],"describe":[1],"an":[2,188],"algorithm":[3,125,151],"to":[4,26,29,81,144,196,228,255],"find":[5,229],"maximal":[6],"exact":[7],"matches":[8,231],"(MEMs)":[9],"among":[10],"HiFi":[11,45,104,115],"reads":[12,46,105],"with":[13,32,48,218],"homopolymer":[14,219],"errors.":[15,33],"The":[16,75],"main":[17],"novelty":[18],"in":[19],"our":[20,142,265],"work":[21],"is":[22,80],"that":[23,96,131,153,204,248],"we":[24,118,205,244],"resort":[25],"run-length":[27,134,209],"compression":[28],"help":[30],"deal":[31],"Our":[34,211],"method":[35,247],"receives":[36],"as":[37,100,221],"input":[38,189],"a":[39,120,164,193,246,251],"run-length-encoded":[40],"string":[41],"collection":[42],"containing":[43],"the":[44,55,62,72,78,83,86,103,114,123,133,137,150,169,198,201,207,214,233,237,240,257,261],"along":[47],"their":[49,91,107,173],"reverse":[50,108],"complements.":[51],"Subsequently,":[52],"it":[53,101,167,222],"splits":[54],"encoding":[56,135],"into":[57],"two":[58,154],"arrays,":[59],"one":[60],"storing":[61,71],"sequence":[63],"of":[64,77,85,122,126,141,200,216,239,260],"symbols":[65,88],"for":[66,113],"equal-symbol":[67,241],"runs":[68],"and":[69,89,106,136,159,180],"another":[70],"run":[73,87],"lengths.":[74],"purpose":[76],"split":[79],"get":[82],"BWT":[84,143],"reorder":[90],"lengths":[92,238],"accordingly.":[93],"show":[95],"this":[97],"special":[98],"BWT,":[99],"encodes":[102],"complements,":[109],"supports":[110],"bi-directional":[111,139],"queries":[112],"reads.":[116],"Then,":[117],"propose":[119],"variation":[121],"MEM":[124,170],"Belazzougui":[127],"et":[128],"al.":[129],"(2013)":[130],"exploits":[132],"implicit":[138],"property":[140],"compute":[145],"approximate":[146,230],"MEMs.":[147],"Concretely,":[148],"if":[149,172],"finds":[152],"substrings,":[155],"$a_1":[156],"\\ldots":[157,161,178,182],"a_p$":[158],"$b_1":[160],"b_p$,":[162],"have":[163],"MEM,":[165],"then":[166],"reports":[168],"only":[171,234],"corresponding":[174],"length":[175,202],"sequences,":[176],"$\\ell^{a}_1":[177],"\\ell^{a}_p$":[179],"$\\ell^{b}_1":[181],"\\ell^{b}_p$,":[183],"do":[184],"not":[185,224],"differ":[186],"beyond":[187],"threshold.":[190],"use":[192],"simple":[194],"metric":[195],"calculate":[197],"similarity":[199],"sequences":[203],"call":[206],"{\\em":[208],"excess}.":[210],"technique":[212],"facilitates":[213],"detection":[215],"MEMs":[217,262],"errors":[220],"does":[223],"require":[225],"dynamic":[226],"programming":[227],"where":[232],"edits":[235],"are":[236],"runs.":[242],"Finally,":[243],"present":[245],"relies":[249],"on":[250],"geometric":[252],"data":[253],"structure":[254],"report":[256],"text":[258],"occurrences":[259],"detected":[263],"by":[264],"algorithm.":[266]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4294318919","counts_by_year":[],"updated_date":"2024-12-13T10:33:05.317459","created_date":"2022-09-02"}