{"id":"https://openalex.org/W2114656094","doi":"https://doi.org/10.1186/1758-2946-2-s1-p7","title":"Comparing manual and automated extraction of chemical entities from documents","display_name":"Comparing manual and automated extraction of chemical entities from documents","publication_year":2010,"publication_date":"2010-05-01","ids":{"openalex":"https://openalex.org/W2114656094","doi":"https://doi.org/10.1186/1758-2946-2-s1-p7","mag":"2114656094","pmcid":"https://www.ncbi.nlm.nih.gov/pmc/articles/2867192"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1186/1758-2946-2-s1-p7","pdf_url":"https://jcheminf.biomedcentral.com/track/pdf/10.1186/1758-2946-2-S1-P7","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310320256"],"host_organization_lineage_names":["Springer Nature","BioMed Central"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://jcheminf.biomedcentral.com/track/pdf/10.1186/1758-2946-2-S1-P7","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051401603","display_name":"Christian Tyrchan","orcid":"https://orcid.org/0000-0002-6470-984X"},"institutions":[{"id":"https://openalex.org/I105036370","display_name":"AstraZeneca (United Kingdom)","ror":"https://ror.org/04r9x1a08","country_code":"GB","type":"funder","lineage":["https://openalex.org/I105036370"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Christian Tyrchan","raw_affiliation_strings":["AstraZeneca R&D, LG CVGI, M\u00f6lndal, Sweden"],"affiliations":[{"raw_affiliation_string":"AstraZeneca R&D, LG CVGI, M\u00f6lndal, Sweden","institution_ids":["https://openalex.org/I105036370"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056365720","display_name":"Sorel Mure\u015fan","orcid":"https://orcid.org/0000-0003-2386-0436"},"institutions":[{"id":"https://openalex.org/I105036370","display_name":"AstraZeneca (United Kingdom)","ror":"https://ror.org/04r9x1a08","country_code":"GB","type":"funder","lineage":["https://openalex.org/I105036370"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Sorel Muresan","raw_affiliation_strings":["AstraZeneca R&D, LG CVGI, M\u00f6lndal, Sweden"],"affiliations":[{"raw_affiliation_string":"AstraZeneca R&D, LG CVGI, M\u00f6lndal, Sweden","institution_ids":["https://openalex.org/I105036370"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1290,"currency":"GBP","value_usd":1582},"apc_paid":{"value":1290,"currency":"GBP","value_usd":1582},"fwci":0.133,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":2,"citation_normalized_percentile":{"value":0.465445,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":71,"max":75},"biblio":{"volume":"2","issue":"S1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9958,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9958,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9896,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9712,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.70885813},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.48041797},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.46571493},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.4105525},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3841901},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.17646903},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.15040159}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1186/1758-2946-2-s1-p7","pdf_url":"https://jcheminf.biomedcentral.com/track/pdf/10.1186/1758-2946-2-S1-P7","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310320256"],"host_organization_lineage_names":["Springer Nature","BioMed Central"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2867192","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1186/1758-2946-2-s1-p7","pdf_url":"https://jcheminf.biomedcentral.com/track/pdf/10.1186/1758-2946-2-S1-P7","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310320256"],"host_organization_lineage_names":["Springer Nature","BioMed Central"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":3,"referenced_works":["https://openalex.org/W2220239985","https://openalex.org/W2950798877","https://openalex.org/W636174227"],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2530322880","https://openalex.org/W2390279801","https://openalex.org/W2382290278","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2350741829","https://openalex.org/W2130043461","https://openalex.org/W2001405890"],"abstract_inverted_index":{"The":[0,78,222],"chemical":[1,28,47,72,79,93,101,168,241],"information":[2,80,242],"landscape":[3],"is":[4,36,52,164],"changing":[5],"rapidly":[6],"with":[7,138],"a":[8,37,53,130,157,165,197,209],"yearly":[9],"increase":[10],"of":[11,100,112,199,211,237],"over":[12],"1":[13],"million":[14,192],"new":[15],"compounds":[16],"and":[17,34,56,81,94,105,110,125,144,154,178,185,206,235],"more":[18,189],"than":[19,190],"700,000":[20],"publications":[21],"related":[22],"to":[23,106],"chemistry":[24,44,119],"[1].":[25],"Exploring":[26],"the":[27,98,108,113,118,233],"space":[29],"covered":[30],"by":[31,65,71,121],"relevant":[32],"journals":[33,153],"patents":[35,155,201,215],"crucial":[38],"step":[39],"in":[40,87,103],"early":[41],"stage":[42],"medicinal":[43],"projects.":[45],"Extracting":[46],"entities":[48],"from":[49,129,152,218,244],"unstructured":[50,245],"text":[51,68,95,126,173,238],"complex":[54,92],"task":[55],"different":[57],"approaches":[58],"are":[59,84,136],"currently":[60],"used":[61],"including":[62],"manual":[63,122],"extraction":[64,243],"expert":[66],"curators,":[67],"mining":[69,127,239],"supported":[70],"NER":[73,102],"or":[74],"combinations":[75],"thereof":[76],"[2].":[77],"corresponding":[82],"annotations":[83],"subsequently":[85],"stored":[86],"relational":[88],"databases":[89,135],"allowing":[90],"for":[91,167,175,207,240],"queries.\r\n\r\nTo":[96],"assess":[97],"capability":[99],"documents":[104],"understand":[107],"coverage":[109],"accuracy":[111],"underlying":[114],"data":[115],"we":[116],"compared":[117],"extracted":[120,151],"curation":[123],"(GVKBIO)":[124],"(SureChem)":[128],"small":[131],"patent":[132,169],"corpus.\r\n\r\n\u2022":[133],"GVKBIO":[134,219],"populated":[137],"explicit":[139],"relationships":[140],"between":[141],"compounds,":[142],"assays":[143],"sequence":[145],"identifiers":[146],"that":[147],"have":[148,195],"been":[149],"manually":[150],"on":[156,171,188],"large":[158],"scale":[159],"[3].\r\n\r\n\u2022":[160],"SureChem":[161,180],"Portal":[162],"[4]":[163],"gateway":[166],"search":[170],"full":[172],"collections":[174],"USPTO,":[176],"EPO":[177],"WO.":[179],"users":[181],"can":[182],"perform":[183],"structure":[184],"keyword":[186],"searches":[187],"9":[191],"unique":[193],"compounds.\r\n\r\nWe":[194],"selected":[196],"set":[198],"250":[200],"covering":[202],"various":[203],"target":[204],"classes":[205],"which":[208],"minimum":[210],"25":[212],"records":[213],"per":[214],"were":[216],"retrieved":[217],"Patent":[220],"database.":[221],"analysis":[223],"was":[224],"done":[225],"using":[226],"PipelinePilot":[227],"protocols":[228],"[5].\r\n\r\nThese":[229],"initial":[230],"results":[231],"demonstrate":[232],"benefits":[234],"challenges":[236],"text.":[246]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2114656094","counts_by_year":[{"year":2016,"cited_by_count":1},{"year":2013,"cited_by_count":1}],"updated_date":"2025-03-16T16:41:28.603519","created_date":"2016-06-24"}