{"id":"https://openalex.org/W4389542870","doi":"https://doi.org/10.1109/embc40787.2023.10341090","title":"Kmer-Node2Vec: a Fast and Efficient Method for Kmer Embedding from the Kmer Co-occurrence Graph, with Applications to DNA Sequences","display_name":"Kmer-Node2Vec: a Fast and Efficient Method for Kmer Embedding from the Kmer Co-occurrence Graph, with Applications to DNA Sequences","publication_year":2023,"publication_date":"2023-07-24","ids":{"openalex":"https://openalex.org/W4389542870","doi":"https://doi.org/10.1109/embc40787.2023.10341090","pmid":"https://pubmed.ncbi.nlm.nih.gov/38083774"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/embc40787.2023.10341090","pdf_url":null,"source":{"id":"https://openalex.org/S4363607706","display_name":"2022 44th Annual International Conference of the IEEE Engineering in Medicine & Biology Society (EMBC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069100479","display_name":"Zhaochong Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhaochong Yu","raw_affiliation_strings":["Department of Big Data and Internet, Shenzhen Technology University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Department of Big Data and Internet, Shenzhen Technology University, Shenzhen, China","institution_ids":["https://openalex.org/I4210152380"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075874959","display_name":"Zihang Yang","orcid":"https://orcid.org/0000-0003-4982-2710"},"institutions":[{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zihang Yang","raw_affiliation_strings":["Department of Big Data and Internet, Shenzhen Technology University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Department of Big Data and Internet, Shenzhen Technology University, Shenzhen, China","institution_ids":["https://openalex.org/I4210152380"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063996880","display_name":"Qingyang Lan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingyang Lan","raw_affiliation_strings":["Department of Big Data and Internet, Shenzhen Technology University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Department of Big Data and Internet, Shenzhen Technology University, Shenzhen, China","institution_ids":["https://openalex.org/I4210152380"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011810690","display_name":"Yuchuan Wang","orcid":"https://orcid.org/0000-0003-4429-7099"},"institutions":[{"id":"https://openalex.org/I137506752","display_name":"North China University of Science and Technology","ror":"https://ror.org/04z4wmb81","country_code":"CN","type":"education","lineage":["https://openalex.org/I137506752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuchuan Wang","raw_affiliation_strings":["North China University of Science and Technology, Tangshan, China"],"affiliations":[{"raw_affiliation_string":"North China University of Science and Technology, Tangshan, China","institution_ids":["https://openalex.org/I137506752"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102135480","display_name":"Feijuan Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210164332","display_name":"Shenzhen Second People's Hospital","ror":"https://ror.org/05c74bq69","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210164332"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feijuan Huang","raw_affiliation_strings":["Shenzhen Second People’s Hospital,Shenzhen Institute of Translational Medicine,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Second People’s Hospital,Shenzhen Institute of Translational Medicine,Shenzhen,China","institution_ids":["https://openalex.org/I4210164332"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5036042523","display_name":"Yuanzhe Cai","orcid":null},"institutions":[{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanzhe Cai","raw_affiliation_strings":["Department of Big Data and Internet, Shenzhen Technology University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Department of Big Data and Internet, Shenzhen Technology University, Shenzhen, China","institution_ids":["https://openalex.org/I4210152380"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":67},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10222","display_name":"Genomics and Chromatin Dynamics","score":0.9985,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9978,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/graph-embedding","display_name":"Graph Embedding","score":0.5304911},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.52463156}],"concepts":[{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.70360374},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.62198114},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.534557},{"id":"https://openalex.org/C75564084","wikidata":"https://www.wikidata.org/wiki/Q5597085","display_name":"Graph embedding","level":3,"score":0.5304911},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.52463156},{"id":"https://openalex.org/C121194460","wikidata":"https://www.wikidata.org/wiki/Q856741","display_name":"Random walk","level":2,"score":0.4299779},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.40684357},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38681775},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35460436},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3243351},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.23366982},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.10965091},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.075855374},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"","qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D004247","descriptor_name":"DNA","qualifier_ui":"","qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D001483","descriptor_name":"Base Sequence","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D004247","descriptor_name":"DNA","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"","qualifier_name":null,"is_major_topic":false}],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/embc40787.2023.10341090","pdf_url":null,"source":{"id":"https://openalex.org/S4363607706","display_name":"2022 44th Annual International Conference of the IEEE Engineering in Medicine & Biology Society (EMBC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38083774","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":16,"referenced_works":["https://openalex.org/W1614298861","https://openalex.org/W2041391522","https://openalex.org/W2142678478","https://openalex.org/W2582377823","https://openalex.org/W2962756421","https://openalex.org/W2963367647","https://openalex.org/W2963469388","https://openalex.org/W2985711816","https://openalex.org/W2998702515","https://openalex.org/W3092136311","https://openalex.org/W3116118967","https://openalex.org/W3136521802","https://openalex.org/W3176543115","https://openalex.org/W4225438928","https://openalex.org/W4293932561","https://openalex.org/W4306795304"],"related_works":["https://openalex.org/W3206528106","https://openalex.org/W3038102983","https://openalex.org/W3036264823","https://openalex.org/W2966686650","https://openalex.org/W2950907416","https://openalex.org/W2912814903","https://openalex.org/W2123605750","https://openalex.org/W2088740331","https://openalex.org/W2082479932","https://openalex.org/W1559483280"],"abstract_inverted_index":{"Learning":[0],"low-dimensional":[1],"continuous":[2],"vector":[3],"representation":[4],"for":[5,41,123],"short":[6],"k-mers":[7],"divided":[8],"from":[9],"long":[10,57],"DNA":[11,16,29,42,86],"sequences":[12],"is":[13,35,116],"key":[14],"to":[15,49,54,77,103],"sequence":[17,30,43,141],"modeling":[18],"that":[19,113],"can":[20],"be":[21],"utilized":[22],"in":[23,60,135],"many":[24],"bioinformatics":[25],"investigations,":[26],"such":[27],"as":[28],"retrieval":[31,142],"and":[32,93,106,130,143],"classification.":[33,144],"DNA2Vec":[34,119,134],"the":[36,84,98],"most":[37],"widely":[38],"used":[39],"method":[40,82,115],"embedding.":[44,62,109],"However,":[45],"it":[46],"poorly":[47],"scales":[48],"large":[50,85],"data":[51,128],"sets":[52],"due":[53],"its":[55],"extremely":[56],"training":[58,124],"time":[59],"kmer":[61,72,90,95,108],"In":[63],"this":[64,79],"paper,":[65],"we":[66],"propose":[67],"a":[68,126],"novel":[69],"efficient":[70],"graph-based":[71],"embedding":[73],"method,":[74],"named":[75],"Kmer-Node2Vec,":[76],"tackle":[78],"concern.":[80],"Our":[81],"converts":[83],"corpus":[87],"into":[88],"one":[89],"co-occurrence":[91],"graph,":[92],"extracts":[94],"relation":[96],"on":[97,125,131],"graph":[99],"by":[100,120],"random":[101],"walks":[102],"learn":[104],"fast":[105],"high-quality":[107],"Extensive":[110],"experiments":[111],"show":[112],"our":[114],"faster":[117],"than":[118],"29":[121],"times":[122],"4GB":[127],"set,":[129],"par":[132],"with":[133],"terms":[136],"of":[137,140],"task-specific":[138],"accuracy":[139]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4389542870","counts_by_year":[],"updated_date":"2025-01-07T07:29:28.898135","created_date":"2023-12-12"}