{"id":"https://openalex.org/W3010772070","doi":"https://doi.org/10.1089/cmb.2019.0299","title":"De Novo Clustering of Long-Read Transcriptome Data Using a Greedy, Quality Value-Based Algorithm","display_name":"De Novo Clustering of Long-Read Transcriptome Data Using a Greedy, Quality Value-Based Algorithm","publication_year":2020,"publication_date":"2020-03-17","ids":{"openalex":"https://openalex.org/W3010772070","doi":"https://doi.org/10.1089/cmb.2019.0299","mag":"3010772070","pmid":"https://pubmed.ncbi.nlm.nih.gov/32181688"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1089/cmb.2019.0299","pdf_url":null,"source":{"id":"https://openalex.org/S78571599","display_name":"Journal of Computational Biology","issn_l":"1066-5277","issn":["1066-5277","1557-8666"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310320443","host_organization_name":"Mary Ann Liebert, Inc.","host_organization_lineage":["https://openalex.org/P4310320443"],"host_organization_lineage_names":["Mary Ann Liebert, Inc."],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8884114","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082939519","display_name":"Kristoffer Sahlin","orcid":"https://orcid.org/0000-0001-7378-2320"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"funder","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kristoffer Sahlin","raw_affiliation_strings":["Department of Computer Science and Engineering and Pennsylvania State University, University Park, Pennsylvania."],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering and Pennsylvania State University, University Park, Pennsylvania.","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066909672","display_name":"Paul Medvedev","orcid":"https://orcid.org/0000-0003-3143-594X"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"funder","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Paul Medvedev","raw_affiliation_strings":["Center for Computational Biology and Bioinformatics, Pennsylvania State University, University Park, Pennsylvania.","Department of Biochemistry and Molecular Biology, Pennsylvania State University, University Park, Pennsylvania.","Department of Computer Science and Engineering and Pennsylvania State University, University Park, Pennsylvania."],"affiliations":[{"raw_affiliation_string":"Department of Biochemistry and Molecular Biology, Pennsylvania State University, University Park, Pennsylvania.","institution_ids":["https://openalex.org/I130769515"]},{"raw_affiliation_string":"Center for Computational Biology and Bioinformatics, Pennsylvania State University, University Park, Pennsylvania.","institution_ids":["https://openalex.org/I130769515"]},{"raw_affiliation_string":"Department of Computer Science and Engineering and Pennsylvania State University, University Park, Pennsylvania.","institution_ids":["https://openalex.org/I130769515"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5082939519"],"corresponding_institution_ids":["https://openalex.org/I130769515"],"apc_list":null,"apc_paid":null,"fwci":4.739,"has_fulltext":false,"cited_by_count":77,"citation_normalized_percentile":{"value":0.999984,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"27","issue":"4","first_page":"472","last_page":"484"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11791","display_name":"Microbial Community Ecology and Physiology","score":0.9956,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9928,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.76389647},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.7215936},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6916617},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6819498},{"id":"https://openalex.org/C126513998","wikidata":"https://www.wikidata.org/wiki/Q8973627","display_name":"Nanopore sequencing","level":4,"score":0.54364675},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5057974},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.50361747},{"id":"https://openalex.org/C51823790","wikidata":"https://www.wikidata.org/wiki/Q504353","display_name":"Greedy algorithm","level":2,"score":0.4963413},{"id":"https://openalex.org/C51679486","wikidata":"https://www.wikidata.org/wiki/Q380546","display_name":"DNA sequencing","level":3,"score":0.22979167},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.2252909},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.20939392},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.11113623},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.10281634},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.09757638},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"","qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D020869","descriptor_name":"Gene Expression Profiling","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D059467","descriptor_name":"Transcriptome","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D020869","descriptor_name":"Gene Expression Profiling","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D059014","descriptor_name":"High-Throughput Nucleotide Sequencing","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D059014","descriptor_name":"High-Throughput Nucleotide Sequencing","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D059467","descriptor_name":"Transcriptome","qualifier_ui":"","qualifier_name":null,"is_major_topic":false}],"locations_count":4,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1089/cmb.2019.0299","pdf_url":null,"source":{"id":"https://openalex.org/S78571599","display_name":"Journal of Computational Biology","issn_l":"1066-5277","issn":["1066-5277","1557-8666"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310320443","host_organization_name":"Mary Ann Liebert, Inc.","host_organization_lineage":["https://openalex.org/P4310320443"],"host_organization_lineage_names":["Mary Ann Liebert, Inc."],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8884114","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://doi.org/10.1101/463463","pdf_url":null,"source":{"id":"https://openalex.org/S4306402567","display_name":"bioRxiv (Cold Spring Harbor Laboratory)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I2750212522","host_organization_name":"Cold Spring Harbor Laboratory","host_organization_lineage":["https://openalex.org/I2750212522"],"host_organization_lineage_names":["Cold Spring Harbor Laboratory"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/32181688","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8884114","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"display_name":"Life below water","id":"https://metadata.un.org/sdg/14","score":0.76}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":46,"referenced_works":["https://openalex.org/W1987820419","https://openalex.org/W2024072609","https://openalex.org/W2041126213","https://openalex.org/W2070455968","https://openalex.org/W2105765411","https://openalex.org/W2110734043","https://openalex.org/W2116699248","https://openalex.org/W2116913753","https://openalex.org/W2120096459","https://openalex.org/W2124351063","https://openalex.org/W2138615112","https://openalex.org/W2142167651","https://openalex.org/W2144560237","https://openalex.org/W2145853890","https://openalex.org/W2149616469","https://openalex.org/W2150483253","https://openalex.org/W2156125289","https://openalex.org/W2164883482","https://openalex.org/W2170747616","https://openalex.org/W2260037449","https://openalex.org/W2415697913","https://openalex.org/W2597262405","https://openalex.org/W2602978558","https://openalex.org/W2608509736","https://openalex.org/W2617273082","https://openalex.org/W2688344404","https://openalex.org/W2747802896","https://openalex.org/W2751702521","https://openalex.org/W2751810028","https://openalex.org/W2789843538","https://openalex.org/W2802684988","https://openalex.org/W2805146307","https://openalex.org/W2893222042","https://openalex.org/W2898449026","https://openalex.org/W2899345312","https://openalex.org/W2899681763","https://openalex.org/W2950150251","https://openalex.org/W2950214184","https://openalex.org/W2950589160","https://openalex.org/W2950954328","https://openalex.org/W2952080813","https://openalex.org/W2953008890","https://openalex.org/W2953020124","https://openalex.org/W3010772070","https://openalex.org/W4235169531","https://openalex.org/W958219903"],"related_works":["https://openalex.org/W4387497383","https://openalex.org/W4321606653","https://openalex.org/W3183948672","https://openalex.org/W3173606202","https://openalex.org/W3110381201","https://openalex.org/W2948807893","https://openalex.org/W2778153218","https://openalex.org/W2758277628","https://openalex.org/W2011430815","https://openalex.org/W1531601525"],"abstract_inverted_index":{"Long-read":[0],"sequencing":[1],"of":[2,21,43,53,67,87,109,130],"transcripts":[3],"with":[4],"Pacific":[5],"Biosciences":[6],"(PacBio)":[7],"Iso-Seq":[8],"and":[9,55,84,101,112],"Oxford":[10],"Nanopore":[11],"Technologies":[12],"has":[13],"proven":[14],"to":[15,18,63,135],"be":[16],"central":[17],"the":[19,41,51],"study":[20],"complex":[22],"isoform":[23],"landscapes":[24],"in":[25,128],"many":[26],"organisms.":[27],"However,":[28],"current":[29],"de":[30],"novo":[31],"transcript":[32],"reconstruction":[33],"algorithms":[34,57],"from":[35],"long-read":[36],"data":[37,104,137],"are":[38],"limited,":[39],"leaving":[40],"potential":[42],"these":[44],"technologies":[45],"unfulfilled.":[46],"A":[47],"common":[48],"bottleneck":[49],"is":[50,80,120],"dearth":[52],"scalable":[54],"accurate":[56],"for":[58],"clustering":[59,77],"long":[60],"reads":[61],"according":[62],"their":[64],"gene":[65],"family":[66],"origin.":[68],"To":[69],"address":[70],"this":[71],"challenge,":[72],"we":[73],"develop":[74],"isONclust,":[75],"a":[76,107,121],"algorithm":[78],"that":[79,118],"greedy":[81],"(to":[82,90],"scale)":[83],"makes":[85],"use":[86],"quality":[88],"values":[89],"handle":[91],"variable":[92],"error":[93],"rates).":[94],"We":[95],"test":[96],"isONclust":[97,119],"on":[98],"three":[99],"simulated":[100],"five":[102],"biological":[103],"sets,":[105],"across":[106],"breadth":[108],"organisms,":[110],"technologies,":[111],"read":[113],"depths.":[114],"Our":[115],"results":[116],"demonstrate":[117],"substantial":[122],"improvement":[123],"over":[124],"previous":[125],"approaches,":[126],"both":[127],"terms":[129],"overall":[131],"accuracy":[132],"and/or":[133],"scalability":[134],"large":[136],"sets.":[138]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3010772070","counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":12},{"year":2023,"cited_by_count":18},{"year":2022,"cited_by_count":18},{"year":2021,"cited_by_count":16},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":4}],"updated_date":"2025-03-18T00:55:01.641149","created_date":"2020-03-23"}