{"id":"https://openalex.org/W4387740953","doi":"https://doi.org/10.1186/s13321-023-00752-6","title":"Prediction of organic compound aqueous solubility using machine learning: a comparison study of descriptor-based and fingerprints-based models","display_name":"Prediction of organic compound aqueous solubility using machine learning: a comparison study of descriptor-based and fingerprints-based models","publication_year":2023,"publication_date":"2023-10-18","ids":{"openalex":"https://openalex.org/W4387740953","doi":"https://doi.org/10.1186/s13321-023-00752-6","pmid":"https://pubmed.ncbi.nlm.nih.gov/37853492"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-023-00752-6","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-023-00752-6","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310320256"],"host_organization_lineage_names":["Springer Nature","BioMed Central"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-023-00752-6","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060684772","display_name":"Arash Tayyebi","orcid":"https://orcid.org/0000-0002-2183-8422"},"institutions":[{"id":"https://openalex.org/I24571045","display_name":"University of North Dakota","ror":"https://ror.org/04a5szx83","country_code":"US","type":"funder","lineage":["https://openalex.org/I24571045"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arash Tayyebi","raw_affiliation_strings":["University of North Dakota, Chemical Engineering, Grand Forks, ND, 58201, USA"],"affiliations":[{"raw_affiliation_string":"University of North Dakota, Chemical Engineering, Grand Forks, ND, 58201, USA","institution_ids":["https://openalex.org/I24571045"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068984479","display_name":"Ali Alshami","orcid":"https://orcid.org/0000-0003-3266-6870"},"institutions":[{"id":"https://openalex.org/I24571045","display_name":"University of North Dakota","ror":"https://ror.org/04a5szx83","country_code":"US","type":"funder","lineage":["https://openalex.org/I24571045"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ali S Alshami","raw_affiliation_strings":["University of North Dakota, Chemical Engineering, Grand Forks, ND, 58201, USA"],"affiliations":[{"raw_affiliation_string":"University of North Dakota, Chemical Engineering, Grand Forks, ND, 58201, USA","institution_ids":["https://openalex.org/I24571045"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053780818","display_name":"Zeinab Rabiei","orcid":null},"institutions":[{"id":"https://openalex.org/I24571045","display_name":"University of North Dakota","ror":"https://ror.org/04a5szx83","country_code":"US","type":"funder","lineage":["https://openalex.org/I24571045"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zeinab Rabiei","raw_affiliation_strings":["Chemistry Department, University of North Dakota, Grand Forks, ND, 58202, USA"],"affiliations":[{"raw_affiliation_string":"Chemistry Department, University of North Dakota, Grand Forks, ND, 58202, USA","institution_ids":["https://openalex.org/I24571045"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073009073","display_name":"Xue Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I24571045","display_name":"University of North Dakota","ror":"https://ror.org/04a5szx83","country_code":"US","type":"funder","lineage":["https://openalex.org/I24571045"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xue Yu","raw_affiliation_strings":["Energy & Environmental Research Center, University of North Dakota, Grand Forks, ND, 58202, USA"],"affiliations":[{"raw_affiliation_string":"Energy & Environmental Research Center, University of North Dakota, Grand Forks, ND, 58202, USA","institution_ids":["https://openalex.org/I24571045"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022096771","display_name":"Nadhem Ismail","orcid":null},"institutions":[{"id":"https://openalex.org/I24571045","display_name":"University of North Dakota","ror":"https://ror.org/04a5szx83","country_code":"US","type":"funder","lineage":["https://openalex.org/I24571045"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nadhem Ismail","raw_affiliation_strings":["University of North Dakota, Chemical Engineering, Grand Forks, ND, 58201, USA"],"affiliations":[{"raw_affiliation_string":"University of North Dakota, Chemical Engineering, Grand Forks, ND, 58201, USA","institution_ids":["https://openalex.org/I24571045"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024174502","display_name":"Musabbir Jahan Talukder","orcid":null},"institutions":[{"id":"https://openalex.org/I24571045","display_name":"University of North Dakota","ror":"https://ror.org/04a5szx83","country_code":"US","type":"funder","lineage":["https://openalex.org/I24571045"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Musabbir Jahan Talukder","raw_affiliation_strings":["University of North Dakota, Chemical Engineering, Grand Forks, ND, 58201, USA"],"affiliations":[{"raw_affiliation_string":"University of North Dakota, Chemical Engineering, Grand Forks, ND, 58201, USA","institution_ids":["https://openalex.org/I24571045"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086366003","display_name":"Jason Power","orcid":"https://orcid.org/0000-0002-9082-7380"},"institutions":[{"id":"https://openalex.org/I24571045","display_name":"University of North Dakota","ror":"https://ror.org/04a5szx83","country_code":"US","type":"funder","lineage":["https://openalex.org/I24571045"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jason Power","raw_affiliation_strings":["University of North Dakota, Biomedical Sciences, Grand Forks, ND, 58202, USA"],"affiliations":[{"raw_affiliation_string":"University of North Dakota, Biomedical Sciences, Grand Forks, ND, 58202, USA","institution_ids":["https://openalex.org/I24571045"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1290,"currency":"GBP","value_usd":1582},"apc_paid":{"value":1290,"currency":"GBP","value_usd":1582},"fwci":7.482,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":25,"citation_normalized_percentile":{"value":0.784749,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"15","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.998,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10836","display_name":"Metabolomics and Mass Spectrometry Studies","score":0.9886,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/molecular-descriptor","display_name":"Molecular descriptor","score":0.581903},{"id":"https://openalex.org/keywords/predictive-modelling","display_name":"Predictive modelling","score":0.4460003},{"id":"https://openalex.org/keywords/experimental-data","display_name":"Experimental data","score":0.43017286},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4227946}],"concepts":[{"id":"https://openalex.org/C155574463","wikidata":"https://www.wikidata.org/wiki/Q170731","display_name":"Solubility","level":2,"score":0.67967224},{"id":"https://openalex.org/C2777826928","wikidata":"https://www.wikidata.org/wiki/Q3745713","display_name":"Fingerprint (computing)","level":2,"score":0.60664934},{"id":"https://openalex.org/C139945424","wikidata":"https://www.wikidata.org/wiki/Q1940696","display_name":"Mean squared error","level":2,"score":0.5847427},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.58230966},{"id":"https://openalex.org/C164923092","wikidata":"https://www.wikidata.org/wiki/Q3705921","display_name":"Molecular descriptor","level":3,"score":0.581903},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5543268},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.4868433},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.4460003},{"id":"https://openalex.org/C186060115","wikidata":"https://www.wikidata.org/wiki/Q30336093","display_name":"Biological system","level":1,"score":0.43727267},{"id":"https://openalex.org/C55037315","wikidata":"https://www.wikidata.org/wiki/Q5421151","display_name":"Experimental data","level":2,"score":0.43017286},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42571902},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4227946},{"id":"https://openalex.org/C128990827","wikidata":"https://www.wikidata.org/wiki/Q192830","display_name":"Coefficient of determination","level":2,"score":0.4185797},{"id":"https://openalex.org/C164126121","wikidata":"https://www.wikidata.org/wiki/Q766383","display_name":"Quantitative structure\u2013activity relationship","level":2,"score":0.40846527},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4084551},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38757625},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.29809397},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2727757},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.22548807},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-023-00752-6","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-023-00752-6","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310320256"],"host_organization_lineage_names":["Springer Nature","BioMed Central"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10583449","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://doi.org/10.21203/rs.3.rs-2155283/v1","pdf_url":"https://www.researchsquare.com/article/rs-2155283/latest.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/37853492","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-023-00752-6","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-023-00752-6","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310320256"],"host_organization_lineage_names":["Springer Nature","BioMed Central"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"display_name":"Clean water and sanitation","id":"https://metadata.un.org/sdg/6","score":0.56}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":45,"referenced_works":["https://openalex.org/W1971892118","https://openalex.org/W1975308375","https://openalex.org/W1988037271","https://openalex.org/W1992428162","https://openalex.org/W1993608091","https://openalex.org/W1995892465","https://openalex.org/W1996839676","https://openalex.org/W2013894207","https://openalex.org/W2019319532","https://openalex.org/W2022806304","https://openalex.org/W2036291177","https://openalex.org/W2042107474","https://openalex.org/W2060765227","https://openalex.org/W2061481072","https://openalex.org/W2063011021","https://openalex.org/W2076498053","https://openalex.org/W2081092801","https://openalex.org/W2086932621","https://openalex.org/W2092598092","https://openalex.org/W2109978476","https://openalex.org/W2217436801","https://openalex.org/W2294467502","https://openalex.org/W2313298850","https://openalex.org/W2600261207","https://openalex.org/W2791355014","https://openalex.org/W2793396277","https://openalex.org/W2900506937","https://openalex.org/W2913395807","https://openalex.org/W2953013255","https://openalex.org/W2962862931","https://openalex.org/W2964186122","https://openalex.org/W2985099987","https://openalex.org/W3098530784","https://openalex.org/W3113282615","https://openalex.org/W3116202926","https://openalex.org/W3163933735","https://openalex.org/W3195831412","https://openalex.org/W3209153008","https://openalex.org/W4200371786","https://openalex.org/W4200442859","https://openalex.org/W4254182148","https://openalex.org/W4281757722","https://openalex.org/W4288885939","https://openalex.org/W4294733685","https://openalex.org/W4376279768"],"related_works":["https://openalex.org/W2773631948","https://openalex.org/W2388825802","https://openalex.org/W2356998416","https://openalex.org/W2091028332","https://openalex.org/W2065239992","https://openalex.org/W2060738338","https://openalex.org/W2057830808","https://openalex.org/W2037471351","https://openalex.org/W2017430527","https://openalex.org/W1999067289"],"abstract_inverted_index":{"A":[0],"reliable":[1],"and":[2,19,42,48,81,117,136,138,145,150,161,177,188],"practical":[3],"determination":[4,130],"of":[5,25,87,104,129,134],"a":[6,38,84,197],"chemical":[7,26,49],"species'":[8,65],"solubility":[9,27,66,96,169],"in":[10,28,78,127,219],"water":[11,29,95],"continues":[12],"to":[13,36,61,93,183,201],"be":[14],"examined":[15],"using":[16,30,67,108,122,171],"empirical":[17],"observations":[18],"exhaustive":[20],"experimental":[21],"studies":[22,60],"alone.":[23],"Predictions":[24],"data-driven":[31],"algorithms":[32],"can":[33],"allow":[34],"us":[35],"create":[37],"rationally":[39],"designed,":[40],"efficient,":[41],"cost-effective":[43],"tool":[44],"for":[45,69,147,167,224],"next-generation":[46],"materials":[47],"formulations.":[50],"We":[51,98,155],"present":[52],"results":[53],"from":[54],"two":[55],"machine":[56],"learning":[57],"(ML)":[58],"modeling":[59],"adequately":[62],"predict":[63],"various":[64],"data":[68],"over":[70],"8400":[71],"compounds.":[72],"Molecular-descriptors,":[73],"the":[74,88,105,109,115,119,123,148,157,163,172,185,194,216,225],"most":[75,164],"used":[76],"method":[77,196],"previous":[79],"studies,":[80],"Morgan":[82],"fingerprint,":[83],"circular-based":[85],"hash":[86],"molecules'":[89],"structures,":[90],"were":[91],"applied":[92],"produce":[94],"estimates.":[97],"trained":[99],"all":[100],"models":[101,160],"on":[102],"80%":[103],"total":[106],"datasets":[107],"Random":[110],"Forest":[111],"(RFs)":[112],"technique":[113],"as":[114],"regressor":[116],"tested":[118],"prediction":[120],"performance":[121],"remaining":[124],"20%,":[125],"resulting":[126],"coefficient":[128],"(R2)":[131],"test":[132,142,227],"values":[133,143],"0.88":[135],"0.81":[137],"root-mean-square":[139],"deviation":[140],"(RMSE)":[141],"0.64":[144],"0.80":[146],"descriptors":[149],"circular":[151],"fingerprint":[152,195,217],"methods,":[153],"respectively.":[154],"interpreted":[156],"produced":[158],"ML":[159],"reported":[162],"effective":[165],"features":[166],"aqueous":[168],"measures":[170],"Shapley":[173],"Additive":[174],"exPlanations":[175],"(SHAP)":[176],"thermodynamic":[178,191],"analysis.":[179],"Low":[180],"error,":[181],"ability":[182],"investigate":[184],"molecular-level":[186],"interactions,":[187],"compatibility":[189],"with":[190],"quantities":[192],"made":[193],"distinct":[198],"model":[199,214,218],"compared":[200],"other":[202],"available":[203],"computational":[204],"tools.":[205],"However,":[206],"it":[207],"is":[208],"worth":[209],"emphasizing":[210],"that":[211],"physicochemical":[212],"descriptor":[213],"outperformed":[215],"achieving":[220],"better":[221],"predictive":[222],"accuracy":[223],"given":[226],"set.":[228]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4387740953","counts_by_year":[{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":17}],"updated_date":"2025-04-03T03:33:39.410385","created_date":"2023-10-19"}