{"id":"https://openalex.org/W2266335005","doi":"https://doi.org/10.1186/1471-2105-16-s17-s7","title":"In search of perfect reads","display_name":"In search of perfect reads","publication_year":2015,"publication_date":"2015-12-01","ids":{"openalex":"https://openalex.org/W2266335005","doi":"https://doi.org/10.1186/1471-2105-16-s17-s7","mag":"2266335005","pmid":"https://pubmed.ncbi.nlm.nih.gov/26679555","pmcid":"https://www.ncbi.nlm.nih.gov/pmc/articles/4674851"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1186/1471-2105-16-s17-s7","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-16-S17-S7","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310320256"],"host_organization_lineage_names":["Springer Nature","BioMed Central"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-16-S17-S7","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086885083","display_name":"Soumitra Pal","orcid":"https://orcid.org/0000-0003-4840-3944"},"institutions":[{"id":"https://openalex.org/I162827531","display_name":"Indian Institute of Technology Bombay","ror":"https://ror.org/02qyf5152","country_code":"IN","type":"education","lineage":["https://openalex.org/I162827531"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Soumitra Pal","raw_affiliation_strings":["Department of Computer Science and Engineering, Indian Institute of Technology Bombay, Powai, 400076, Mumbai, India"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Indian Institute of Technology Bombay, Powai, 400076, Mumbai, India","institution_ids":["https://openalex.org/I162827531"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074347913","display_name":"Srinivas Aluru","orcid":"https://orcid.org/0000-0003-4279-469X"},"institutions":[{"id":"https://openalex.org/I162827531","display_name":"Indian Institute of Technology Bombay","ror":"https://ror.org/02qyf5152","country_code":"IN","type":"education","lineage":["https://openalex.org/I162827531"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Srinivas Aluru","raw_affiliation_strings":["Department of Computer Science and Engineering, Indian Institute of Technology Bombay, Powai, 400076, Mumbai, India"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Indian Institute of Technology Bombay, Powai, 400076, Mumbai, India","institution_ids":["https://openalex.org/I162827531"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1690,"currency":"GBP","value_usd":2072,"provenance":"doaj"},"apc_paid":{"value":1690,"currency":"GBP","value_usd":2072,"provenance":"doaj"},"fwci":0.379,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":4,"citation_normalized_percentile":{"value":0.465,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":79,"max":81},"biblio":{"volume":"16","issue":"S17","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"RNA Sequencing Data Analysis","score":0.9983,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"RNA Sequencing Data Analysis","score":0.9983,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Distributed Storage Systems and Network Coding","score":0.9928,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11407","display_name":"Droplet Microfluidics Technology","score":0.975,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sequence-alignment","display_name":"sequence alignment","score":0.5495},{"id":"https://openalex.org/keywords/high-throughput-screening","display_name":"High-Throughput Screening","score":0.54645},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.5425033},{"id":"https://openalex.org/keywords/erasure-coding","display_name":"Erasure Coding","score":0.508122},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.487133}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.73354447},{"id":"https://openalex.org/C103088060","wikidata":"https://www.wikidata.org/wiki/Q1062839","display_name":"Error detection and correction","level":2,"score":0.5500813},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.5425033},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.51057845},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.5071616},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.487133},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4061108},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.15031523},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.11069116},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":"","qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D030561","descriptor_name":"Databases, Nucleic Acid","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D059014","descriptor_name":"High-Throughput Nucleotide Sequencing","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D059014","descriptor_name":"High-Throughput Nucleotide Sequencing","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"","qualifier_name":null,"is_major_topic":false}],"locations_count":4,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1186/1471-2105-16-s17-s7","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-16-S17-S7","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310320256"],"host_organization_lineage_names":["Springer Nature","BioMed Central"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://europepmc.org/articles/pmc4674851","pdf_url":"https://europepmc.org/articles/pmc4674851?pdf=render","source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":["European Bioinformatics Institute"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4674851","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/26679555","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1186/1471-2105-16-s17-s7","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-16-S17-S7","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310320256"],"host_organization_lineage_names":["Springer Nature","BioMed Central"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.44,"display_name":"Industry, innovation and infrastructure"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":16,"referenced_works":["https://openalex.org/W142724965","https://openalex.org/W1980032728","https://openalex.org/W1980667059","https://openalex.org/W2014099509","https://openalex.org/W2034937344","https://openalex.org/W2096128575","https://openalex.org/W2101250487","https://openalex.org/W2103441770","https://openalex.org/W2119745866","https://openalex.org/W2120076861","https://openalex.org/W2122812051","https://openalex.org/W2127087491","https://openalex.org/W2142702487","https://openalex.org/W2152246890","https://openalex.org/W2160265768","https://openalex.org/W2160665949"],"related_works":["https://openalex.org/W330130819","https://openalex.org/W3121416282","https://openalex.org/W2760721665","https://openalex.org/W2288610023","https://openalex.org/W2281389338","https://openalex.org/W2136583354","https://openalex.org/W2112044895","https://openalex.org/W2111238207","https://openalex.org/W2037453743","https://openalex.org/W168676510"],"abstract_inverted_index":{"Continued":[0],"advances":[1],"in":[2,27,45,85,159,236,334],"next":[3],"generation":[4],"short-read":[5],"sequencing":[6,283],"technologies":[7],"are":[8,75,81,94,162],"increasing":[9],"throughput":[10],"and":[11,88,115,130,241,277,314],"read":[12],"lengths,":[13],"while":[14],"driving":[15],"down":[16],"error":[17,31,65,225,302,326,341],"rates.":[18],"Taking":[19],"advantage":[20],"of":[21,67,92,109,141,156,170,213,279,340],"the":[22,72,79,106,145,157,189,197,206,211,234,257,260,271,275],"high":[23,46],"coverage":[24,47,191,204,276],"sampling":[25],"used":[26],"many":[28],"applications,":[29],"several":[30,139],"correction":[32,226,303,327],"algorithms":[33,343],"have":[34,285],"been":[35],"developed":[36],"to":[37,59,98,121,128,239,249,255,267,270,312,324],"improve":[38],"data":[39,49],"quality":[40],"further.":[41],"However,":[42],"correcting":[43,342],"errors":[44,80],"sequence":[48,62],"requires":[50],"significant":[51],"computing":[52],"resources.":[53],"We":[54,111,209],"propose":[55,313],"a":[56,89,113,160,203,237,309,330,337,345],"different":[57],"approach":[58,215],"handle":[60],"erroneous":[61,133,184,243],"data.":[63],"Presently,":[64],"rates":[66],"high-throughput":[68],"platforms":[69,284],"such":[70,100],"as":[71,183,201,205,329,344],"Illumina":[73],"HiSeq":[74],"within":[76],"1%.":[77],"Moreover,":[78,319],"not":[82],"uniformly":[83],"distributed":[84],"all":[86],"reads,":[87,259],"large":[90],"percentage":[91],"reads":[93,102,134,158,181,199,235,247,280,297],"indeed":[95],"error-free.":[96],"Ability":[97],"predict":[99],"perfect":[101,251],"can":[103,135,292],"significantly":[104],"impact":[105],"run-time":[107],"complexity":[108],"applications.":[110],"present":[112],"simple":[114],"fast":[116],"k-spectrum":[117],"analysis":[118],"based":[119],"method":[120,166,187,254],"identify":[122,129,240],"error-free":[123],"reads.":[124],"The":[125],"filtration":[126],"process":[127],"weed":[131],"out":[132,180,296],"be":[136,250],"customized":[137],"at":[138],"levels":[140],"stringency":[142],"depending":[143],"upon":[144],"downstream":[146,219,331],"application":[147],"need.":[148],"Our":[149,306],"experiments":[150],"show":[151,222],"that":[152,223],"if":[153],"around":[154],"80%":[155],"dataset":[161,238],"perfect,":[163],"then":[164],"our":[165,186,214,253,320],"retains":[167],"almost":[168],"99.9%":[169],"them":[171],"with":[172,298],"more":[173],"than":[174],"90%":[175],"precision":[176],"rate.":[177],"Though":[178],"filtering":[179,295],"identified":[182],"by":[185,192,252,265,346],"reduces":[188],"average":[190],"about":[193],"7%,":[194],"we":[195,221,291],"found":[196],"remaining":[198],"provide":[200],"uniform":[202],"original":[207],"dataset.":[208],"demonstrate":[210,315],"effectiveness":[212],"on":[216,231],"an":[217,224,288],"example":[218],"application:":[220],"algorithm,":[227],"Reptile,":[228],"which":[229],"rely":[230],"collectively":[232],"analyzing":[233],"correct":[242,256],"bases,":[244],"instead":[245],"use":[246],"predicted":[248],"other":[258],"overall":[261],"accuracy":[262,278],"improves":[263],"further":[264],"up":[266],"10%.":[268],"Thanks":[269],"continuous":[272],"technological":[273],"improvements,":[274],"from":[281],"dominant":[282],"now":[286],"reached":[287],"extent":[289],"where":[290],"envision":[293],"just":[294],"errors,":[299],"thus":[300],"making":[301],"less":[304],"important.":[305],"algorithm":[307,328],"is":[308,322],"first":[310],"attempt":[311],"this":[316,333],"new":[317,338],"paradigm.":[318],"demonstration":[321],"applicable":[323],"any":[325],"application,":[332],"turn":[335],"gives":[336],"class":[339],"product.":[347]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2266335005","counts_by_year":[{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":3}],"updated_date":"2024-11-27T16:59:29.703005","created_date":"2016-06-24"}