{"id":"https://openalex.org/W4283647301","doi":"https://doi.org/10.48550/arxiv.2206.12088","title":"Classifying Unstructured Clinical Notes via Automatic Weak Supervision","display_name":"Classifying Unstructured Clinical Notes via Automatic Weak Supervision","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4283647301","doi":"https://doi.org/10.48550/arxiv.2206.12088"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2206.12088","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2206.12088","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5104294452","display_name":"Chufan Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Chufan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032798300","display_name":"Mononito Goswami","orcid":"https://orcid.org/0000-0002-4117-5558"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Goswami, Mononito","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101757967","display_name":"Jieshi Chen","orcid":"https://orcid.org/0000-0003-2660-4913"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Jieshi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5037154494","display_name":"Artur Dubrawski","orcid":"https://orcid.org/0000-0002-2372-0831"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dubrawski, Artur","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":60},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.983,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.983,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9777,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9762,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/medical-classification","display_name":"Medical classification","score":0.43277565},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4324367},{"id":"https://openalex.org/keywords/unstructured-data","display_name":"Unstructured data","score":0.41159374},{"id":"https://openalex.org/keywords/diagnosis-code","display_name":"Diagnosis code","score":0.41062704}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.77363884},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.69035465},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.59292537},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.58430964},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.5576882},{"id":"https://openalex.org/C534262118","wikidata":"https://www.wikidata.org/wiki/Q177719","display_name":"Medical diagnosis","level":2,"score":0.5559956},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4884104},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.44535452},{"id":"https://openalex.org/C154874363","wikidata":"https://www.wikidata.org/wiki/Q3518464","display_name":"Medical classification","level":2,"score":0.43277565},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4324367},{"id":"https://openalex.org/C2781252014","wikidata":"https://www.wikidata.org/wiki/Q1141900","display_name":"Unstructured data","level":3,"score":0.41159374},{"id":"https://openalex.org/C45827449","wikidata":"https://www.wikidata.org/wiki/Q5270338","display_name":"Diagnosis code","level":3,"score":0.41062704},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.29385963},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.1540631},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.10660279},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C159110408","wikidata":"https://www.wikidata.org/wiki/Q121176","display_name":"Nursing","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C142724271","wikidata":"https://www.wikidata.org/wiki/Q7208","display_name":"Pathology","level":1,"score":0.0},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.0},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2206.12088","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2206.12088","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2206.12088","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W72816711","https://openalex.org/W57843728","https://openalex.org/W4292539280","https://openalex.org/W2802989430","https://openalex.org/W2791484311","https://openalex.org/W2419506399","https://openalex.org/W2406037887","https://openalex.org/W2333798660","https://openalex.org/W2158076149","https://openalex.org/W2073015584"],"abstract_inverted_index":{"Healthcare":[0],"providers":[1,28],"usually":[2],"record":[3],"detailed":[4],"notes":[5,187],"of":[6,25,43,65,81,162],"the":[7,22,40,87,126,135,145,158,189],"clinical":[8],"care":[9],"delivered":[10],"to":[11,21,32,36,85,103,128,149,153,168,181,185],"each":[12],"patient":[13],"for":[14],"clinical,":[15],"research,":[16],"and":[17,58,144,160],"billing":[18],"purposes.":[19],"Due":[20],"unstructured":[23],"nature":[24],"these":[26],"narratives,":[27],"employ":[29],"dedicated":[30],"staff":[31],"assign":[33,150],"diagnostic":[34,90],"codes":[35,184],"patients'":[37],"diagnoses":[38],"using":[39],"International":[41],"Classification":[42],"Diseases":[44],"(ICD)":[45],"coding":[46,91],"system.":[47],"This":[48],"manual":[49],"process":[50],"is":[51],"not":[52],"only":[53],"time-consuming":[54],"but":[55,74],"also":[56],"costly":[57],"error-prone.":[59],"Prior":[60],"work":[61],"demonstrated":[62],"potential":[63],"utility":[64],"Machine":[66],"Learning":[67],"(ML)":[68],"methodology":[69],"in":[70,179,188],"automating":[71],"this":[72,109],"process,":[73],"it":[75,167],"has":[76],"relied":[77],"on":[78],"large":[79],"quantities":[80],"manually":[82],"labeled":[83],"data":[84,146],"train":[86],"models.":[88],"Additionally,":[89],"systems":[92],"evolve":[93],"with":[94],"time,":[95],"which":[96],"makes":[97],"traditional":[98],"supervised":[99],"learning":[100],"strategies":[101],"unable":[102],"generalize":[104],"beyond":[105],"local":[106],"applications.":[107],"In":[108],"work,":[110],"we":[111],"introduce":[112],"a":[113],"general":[114],"weakly-supervised":[115],"text":[116,171,176],"classification":[117,177],"framework":[118,148],"that":[119],"learns":[120],"from":[121],"class-label":[122],"descriptions":[123],"only,":[124],"without":[125],"need":[127],"use":[129],"any":[130],"human-labeled":[131],"documents.":[132],"It":[133],"leverages":[134],"linguistic":[136],"domain":[137],"knowledge":[138],"stored":[139],"within":[140],"pre-trained":[141],"language":[142],"models":[143],"programming":[147],"code":[151],"labels":[152],"individual":[154],"texts.":[155],"We":[156],"demonstrate":[157],"efficacy":[159],"flexibility":[161],"our":[163],"method":[164],"by":[165],"comparing":[166],"state-of-the-art":[169],"weak":[170],"classifiers":[172],"across":[173],"four":[174],"real-world":[175],"datasets,":[178],"addition":[180],"assigning":[182],"ICD":[183],"medical":[186],"publicly":[190],"available":[191],"MIMIC-III":[192],"database.":[193]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4283647301","counts_by_year":[],"updated_date":"2024-12-12T22:25:13.980773","created_date":"2022-06-29"}