{"id":"https://openalex.org/W1995747683","doi":"https://doi.org/10.1186/1758-2946-5-s1-p53","title":"Deterministic clustering of the available chemical space","display_name":"Deterministic clustering of the available chemical space","publication_year":2013,"publication_date":"2013-03-01","ids":{"openalex":"https://openalex.org/W1995747683","doi":"https://doi.org/10.1186/1758-2946-5-s1-p53","mag":"1995747683","pmcid":"https://www.ncbi.nlm.nih.gov/pmc/articles/3606189"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1186/1758-2946-5-s1-p53","pdf_url":"https://jcheminf.biomedcentral.com/track/pdf/10.1186/1758-2946-5-S1-P53","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310320256"],"host_organization_lineage_names":["Springer Nature","BioMed Central"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://jcheminf.biomedcentral.com/track/pdf/10.1186/1758-2946-5-S1-P53","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082172915","display_name":"Philipp Thiel","orcid":"https://orcid.org/0000-0001-9498-1214"},"institutions":[{"id":"https://openalex.org/I149899117","display_name":"Max Planck Society","ror":"https://ror.org/01hhn8329","country_code":"DE","type":"nonprofit","lineage":["https://openalex.org/I149899117"]},{"id":"https://openalex.org/I8087733","display_name":"University of T\u00fcbingen","ror":"https://ror.org/03a1kwz48","country_code":"DE","type":"education","lineage":["https://openalex.org/I8087733"]},{"id":"https://openalex.org/I4210166506","display_name":"Chemical Genomics Centre","ror":"https://ror.org/05wctbc12","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210166506"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Philipp Thiel","raw_affiliation_strings":["Applied Bioinformatics, Center for Bioinformatics, Quantitative Biology Center and Dept. of Computer Science, University of T\u00fcbingen, T\u00fcbingen, Germany","Chemical Genomics Centre of the Max Planck Society, Dortmund, Germany"],"affiliations":[{"raw_affiliation_string":"Chemical Genomics Centre of the Max Planck Society, Dortmund, Germany","institution_ids":["https://openalex.org/I149899117","https://openalex.org/I4210166506"]},{"raw_affiliation_string":"Applied Bioinformatics, Center for Bioinformatics, Quantitative Biology Center and Dept. of Computer Science, University of T\u00fcbingen, T\u00fcbingen, Germany","institution_ids":["https://openalex.org/I8087733"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048663892","display_name":"Lisa Peltason","orcid":null},"institutions":[{"id":"https://openalex.org/I118019719","display_name":"Roche (Switzerland)","ror":"https://ror.org/00by1q217","country_code":"CH","type":"company","lineage":["https://openalex.org/I118019719"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Lisa Peltason","raw_affiliation_strings":["F. Hoffmann\u2013La Roche AG, Basel, Switzerland"],"affiliations":[{"raw_affiliation_string":"F. Hoffmann\u2013La Roche AG, Basel, Switzerland","institution_ids":["https://openalex.org/I118019719"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012574716","display_name":"Christian Ottmann","orcid":"https://orcid.org/0000-0001-7315-0315"},"institutions":[{"id":"https://openalex.org/I149899117","display_name":"Max Planck Society","ror":"https://ror.org/01hhn8329","country_code":"DE","type":"nonprofit","lineage":["https://openalex.org/I149899117"]},{"id":"https://openalex.org/I4210166506","display_name":"Chemical Genomics Centre","ror":"https://ror.org/05wctbc12","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210166506"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Christian Ottmann","raw_affiliation_strings":["Chemical Genomics Centre of the Max-Planck-Society, Dortmund, Germany"],"affiliations":[{"raw_affiliation_string":"Chemical Genomics Centre of the Max-Planck-Society, Dortmund, Germany","institution_ids":["https://openalex.org/I149899117","https://openalex.org/I4210166506"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053621010","display_name":"Oliver Kohlbacher","orcid":"https://orcid.org/0000-0003-1739-4598"},"institutions":[{"id":"https://openalex.org/I8087733","display_name":"University of T\u00fcbingen","ror":"https://ror.org/03a1kwz48","country_code":"DE","type":"education","lineage":["https://openalex.org/I8087733"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Oliver Kohlbacher","raw_affiliation_strings":["Applied Bioinformatics, Center for Bioinformatics, Quantitative Biology Center and Dept. of Computer Science, University of T\u00fcbingen, T\u00fcbingen, Germany"],"affiliations":[{"raw_affiliation_string":"Applied Bioinformatics, Center for Bioinformatics, Quantitative Biology Center and Dept. of Computer Science, University of T\u00fcbingen, T\u00fcbingen, Germany","institution_ids":["https://openalex.org/I8087733"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1290,"currency":"GBP","value_usd":1582,"provenance":"doaj"},"apc_paid":{"value":1290,"currency":"GBP","value_usd":1582,"provenance":"doaj"},"fwci":0.0,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":65},"biblio":{"volume":"5","issue":"S1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12748","display_name":"Molecular spectroscopy and chirality","score":0.998,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10836","display_name":"Metabolomics and Mass Spectrometry Studies","score":0.993,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/chemical-space","display_name":"Chemical space","score":0.5242767}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.64180654},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.62791026},{"id":"https://openalex.org/C99726746","wikidata":"https://www.wikidata.org/wiki/Q906396","display_name":"Chemical space","level":3,"score":0.5242767},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.51260537},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.43978894},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.429263},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.21034083},{"id":"https://openalex.org/C74187038","wikidata":"https://www.wikidata.org/wiki/Q1418791","display_name":"Drug discovery","level":2,"score":0.19248375},{"id":"https://openalex.org/C60644358","wikidata":"https://www.wikidata.org/wiki/Q128570","display_name":"Bioinformatics","level":1,"score":0.19242713},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.08559561},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1186/1758-2946-5-s1-p53","pdf_url":"https://jcheminf.biomedcentral.com/track/pdf/10.1186/1758-2946-5-S1-P53","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310320256"],"host_organization_lineage_names":["Springer Nature","BioMed Central"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3606189","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1186/1758-2946-5-s1-p53","pdf_url":"https://jcheminf.biomedcentral.com/track/pdf/10.1186/1758-2946-5-S1-P53","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310320256"],"host_organization_lineage_names":["Springer Nature","BioMed Central"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.53}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":5,"referenced_works":["https://openalex.org/W2000376564","https://openalex.org/W2027482274","https://openalex.org/W2045862148","https://openalex.org/W2161834869","https://openalex.org/W2177658341"],"related_works":["https://openalex.org/W4281772408","https://openalex.org/W4254684673","https://openalex.org/W4226062292","https://openalex.org/W4212982662","https://openalex.org/W3191713309","https://openalex.org/W2902857455","https://openalex.org/W2161782568","https://openalex.org/W2160244398","https://openalex.org/W2078293348","https://openalex.org/W2023175890"],"abstract_inverted_index":{"Clustering":[0],"of":[1,44,95,117,190,316],"compound":[2,135,141],"libraries":[3,100,114,136,304],"using":[4,61,110,122,344],"2D":[5],"binary":[6],"fingerprints":[7,193],"is":[8,47,107,270],"a":[9,92,129,179,188,216,221,254,260,289,341],"fundamental":[10],"task":[11],"in":[12],"chemoinformatics":[13],"and":[14,32,66,83,151,201,244,277,347],"various":[15],"methods":[16,25,53,276],"have":[17,76,159],"been":[18,77],"described":[19],"to":[20,56,63,79,97,103,161,214,225,229,246,272,281,299,302,352],"solve":[21],"it":[22],"[1].":[23],"These":[24],"can":[26,278],"roughly":[27],"be":[28,162,230],"grouped":[29],"into":[30,232],"deterministic":[31,45,69,111,130],"non-deterministic":[33,52,88],"approaches":[34,46,89],"with":[35,101,181],"two":[36],"key-characteristics":[37],"distinguishing":[38],"them.":[39],"First,":[40,185],"the":[41,51,182,227,233,239,247,314,322],"algorithmic":[42],"complexity":[43],"more":[48],"demanding":[49],"whereas":[50,113],"often":[54,84],"try":[55],"overcome":[57],"this":[58,144,174,236],"drawback":[59],"by":[60,194,219],"heuristics":[62,123],"save":[64],"time":[65],"memory.":[67],"Second,":[68,208],"clustering":[70,94,133,318,335],"algorithms,":[71],"especially":[72],"agglomerative":[73],"hierarchical":[74],"techniques":[75,112],"shown":[78],"yield":[80],"good":[81],"results":[82],"perform":[85,279],"better":[86],"than":[87],"[2].":[90],"As":[91],"consequence,":[93],"small":[96],"medium":[98,301],"sized":[99],"up":[102,280],"1":[104],"million":[105,283,330],"compounds":[106,118,331],"regularly":[108],"performed":[109],"comprising":[115,327],"millions":[116],"are":[119,198,242,257],"mostly":[120],"clustered":[121],"like":[124],"k-means":[125],"[3].\r\n\r\nHere,":[126],"we":[127,146,158,177,186,209,262,320],"present":[128],"approach":[131],"for":[132,154,169,267],"huge":[134],"based":[137],"on":[138,288,305,340],"all":[139,211],"pairwise":[140,212],"similarities.":[142],"For":[143],"purpose,":[145],"use":[147],"an":[148],"extremely":[149],"fast":[150],"flexible":[152],"algorithm":[153,266],"similarity":[155,175,217,237,268],"calculations,":[156],"which":[157],"developed":[160],"purely":[163],"CPU-based":[164,275],"thus":[165],"having":[166],"no":[167],"need":[168],"any":[170],"specialized":[171],"hardware.":[172],"Using":[173],"method,":[176],"implemented":[178],"workflow":[180,298,336],"following":[183],"steps.":[184],"create":[187],"set":[189],"unique":[191],"input":[192],"filtering":[195],"duplicates":[196],"that":[197,264],"then":[199],"stored":[200],"finally":[202,312],"remapped":[203],"onto":[204],"their":[205],"representative":[206],"clusters.":[207],"calculate":[210],"similarities":[213],"construct":[215],"network":[218,238],"applying":[220],"fixed":[222],"Tanimoto":[223,284],"threshold":[224],"select":[226],"edges":[228],"inserted":[231],"network.":[234],"From":[235],"connected":[240,251],"subgraphs":[241,252],"extracted":[243],"forwarded":[245],"last":[248],"step.":[249],"Finally,":[250],"exceeding":[253],"predefined":[255],"size":[256],"hierarchically":[258],"clustered.\r\n\r\nAs":[259],"result,":[261],"show":[263],"our":[265,297,317],"calculation":[269],"competitive":[271],"recently":[273],"published":[274],"380":[282],"calculations":[285],"per":[286],"second":[287],"current":[290,306],"desktop":[291,307],"computer.":[292],"This":[293],"efficient":[294],"method":[295],"allows":[296],"process":[300],"large":[303],"computers":[308],"within":[309],"minutes.":[310],"To":[311],"demonstrate":[313],"power":[315],"workflow,":[319],"processed":[321],"commercially":[323],"available":[324],"chemical":[325],"space":[326],"about":[328],"17":[329],"[4].":[332],"The":[333],"entire":[334],"took":[337],"63":[338],"hours":[339],"compute":[342],"server":[343],"64":[345],"cores":[346],"100":[348],"GB":[349],"main":[350],"memory":[351],"complete.":[353]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W1995747683","counts_by_year":[],"updated_date":"2024-12-13T09:07:12.885766","created_date":"2016-06-24"}