{"id":"https://openalex.org/W4399795043","doi":"https://doi.org/10.48550/arxiv.2406.11328","title":"Are Large Language Models True Healthcare Jacks-of-All-Trades?\n Benchmarking Across Health Professions Beyond Physician Exams","display_name":"Are Large Language Models True Healthcare Jacks-of-All-Trades?\n Benchmarking Across Health Professions Beyond Physician Exams","publication_year":2024,"publication_date":"2024-06-17","ids":{"openalex":"https://openalex.org/W4399795043","doi":"https://doi.org/10.48550/arxiv.2406.11328"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2406.11328","pdf_url":"https://arxiv.org/pdf/2406.11328","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2406.11328","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075367840","display_name":"Zheheng Luo","orcid":"https://orcid.org/0000-0001-8246-5511"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Zheheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034535852","display_name":"Chenhan Yuan","orcid":"https://orcid.org/0000-0001-9667-0460"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Chenhan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101868563","display_name":"Qianqian Xie","orcid":"https://orcid.org/0000-0002-9588-7454"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Qianqian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5077976343","display_name":"Sophia Ananiadou","orcid":"https://orcid.org/0000-0002-4097-9191"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ananiadou, Sophia","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":83},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.7111,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.7111,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12708","display_name":"Economic and Financial Impacts of Cancer","score":0.6858,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.89309156}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.89309156},{"id":"https://openalex.org/C160735492","wikidata":"https://www.wikidata.org/wiki/Q31207","display_name":"Health care","level":2,"score":0.64700013},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.42225686},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.3384413},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.33358145},{"id":"https://openalex.org/C512399662","wikidata":"https://www.wikidata.org/wiki/Q3505712","display_name":"Family medicine","level":1,"score":0.32240033},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.16730103},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.12094325},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.06128958}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2406.11328","pdf_url":"https://arxiv.org/pdf/2406.11328","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2406.11328","pdf_url":"https://arxiv.org/pdf/2406.11328","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W435179959","https://openalex.org/W4238897586","https://openalex.org/W2748952813","https://openalex.org/W2619091065","https://openalex.org/W2291782699","https://openalex.org/W2284465472","https://openalex.org/W2059640416","https://openalex.org/W1993948687","https://openalex.org/W1490753184"],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,11,27,52,61,189,236],"Large":[3],"Language":[4],"Models":[5],"(LLMs)":[6],"have":[7],"demonstrated":[8],"their":[9,117,187],"potential":[10],"delivering":[12],"accurate":[13],"answers":[14],"to":[15,198,221,229],"questions":[16,69,162],"about":[17],"world":[18],"knowledge.":[19],"Despite":[20],"this,":[21],"existing":[22],"benchmarks":[23,220],"for":[24,49],"evaluating":[25,116],"LLMs":[26,146,235],"healthcare":[28,37,58,75,227,238],"predominantly":[29],"focus":[30],"on":[31,102,161,181,206],"medical":[32,113],"doctors,":[33],"leaving":[34],"other":[35],"critical":[36],"professions":[38,228],"underrepresented.":[39],"To":[40],"fill":[41],"this":[42],"research":[43],"gap,":[44],"we":[45],"introduce":[46],"the":[47,159,165,178,182,216,232],"Examinations":[48],"Medical":[50],"Personnel":[51],"Chinese":[53,200],"(EMPEC),":[54],"a":[55,203,223],"pioneering":[56],"large-scale":[57],"knowledge":[59],"benchmark":[60],"traditional":[62,197],"Chinese.":[63],"EMPEC":[64],"consists":[65],"of":[66,218,226,234],"157,803":[67],"exam":[68],"across":[70],"124":[71],"subjects":[72],"and":[73,82,92,96,112,141,150],"20":[74],"professions,":[76],"including":[77,105],"underrepresented":[78],"occupations":[79],"like":[80,129],"Optometrists":[81],"Audiologists.":[83],"Each":[84],"question":[85],"is":[86],"tagged":[87],"with":[88,138,172],"its":[89],"release":[90],"time":[91],"source,":[93],"ensuring":[94],"relevance":[95],"authenticity.":[97],"We":[98],"conducted":[99],"extensive":[100],"experiments":[101],"17":[103],"LLMs,":[104],"proprietary,":[106],"open-source":[107],"models,":[108,115,149],"general":[109],"domain":[110],"models":[111,128],"specific":[114],"performance":[118,174,180],"under":[119],"various":[120],"settings.":[121],"Our":[122,213],"findings":[123],"reveal":[124],"that":[125,177],"while":[126],"leading":[127],"GPT-4":[130],"achieve":[131],"over":[132],"75\\%":[133],"accuracy,":[134],"they":[135],"still":[136],"struggle":[137],"specialized":[139],"fields":[140],"alternative":[142],"medicine.":[143],"Surprisingly,":[144],"general-purpose":[145],"outperformed":[147],"medical-specific":[148],"incorporating":[151],"EMPEC's":[152],"training":[153,167],"data":[154],"significantly":[155],"enhanced":[156],"performance.":[157],"Additionally,":[158],"results":[160],"released":[163],"after":[164],"models'":[166,179],"cutoff":[168],"date":[169],"were":[170],"consistent":[171],"overall":[173],"trends,":[175],"suggesting":[176],"test":[183],"set":[184],"can":[185],"predict":[186],"effectiveness":[188],"addressing":[190],"unseen":[191],"healthcare-related":[192],"queries.":[193],"The":[194],"transition":[195],"from":[196],"simplified":[199],"characters":[201],"had":[202],"negligible":[204],"impact":[205],"model":[207],"performance,":[208],"indicating":[209],"robust":[210],"linguistic":[211],"versatility.":[212],"study":[214],"underscores":[215],"importance":[217],"expanding":[219],"cover":[222],"broader":[224],"range":[225],"better":[230],"assess":[231],"applicability":[233],"real-world":[237],"scenarios.":[239]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4399795043","counts_by_year":[],"updated_date":"2025-01-09T08:40:58.928782","created_date":"2024-06-19"}