{"id":"https://openalex.org/W4387963919","doi":"https://doi.org/10.48550/arxiv.2310.16517","title":"OccuQuest: Mitigating Occupational Bias for Inclusive Large Language Models","display_name":"OccuQuest: Mitigating Occupational Bias for Inclusive Large Language Models","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4387963919","doi":"https://doi.org/10.48550/arxiv.2310.16517"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2310.16517","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2310.16517","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071925203","display_name":"Mingfeng Xue","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xue, Mingfeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062188134","display_name":"Dayiheng Liu","orcid":"https://orcid.org/0000-0002-8755-8941"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Dayiheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031587199","display_name":"Kexin Yang","orcid":"https://orcid.org/0000-0003-2333-2406"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Kexin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111063144","display_name":"Guanting Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Guanting","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039239180","display_name":"Wenqiang Lei","orcid":"https://orcid.org/0000-0001-6540-0601"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lei, Wenqiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109039519","display_name":"Zheng Yuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Zheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103055433","display_name":"Chang Zhou","orcid":"https://orcid.org/0000-0002-6701-6619"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Chang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5057864403","display_name":"Jingren Zhou","orcid":"https://orcid.org/0000-0002-4220-2634"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Jingren","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.710701,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":76,"max":82},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9671,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9671,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9597,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9028,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/specialty","display_name":"Specialty","score":0.50825113}],"concepts":[{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.70762753},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.589867},{"id":"https://openalex.org/C20387591","wikidata":"https://www.wikidata.org/wiki/Q930752","display_name":"Specialty","level":2,"score":0.50825113},{"id":"https://openalex.org/C82279013","wikidata":"https://www.wikidata.org/wiki/Q684740","display_name":"Real estate","level":2,"score":0.49529377},{"id":"https://openalex.org/C2780495726","wikidata":"https://www.wikidata.org/wiki/Q2186896","display_name":"Estate","level":2,"score":0.4489191},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.40918598},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.397768},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.1874311},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.13648784},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.1109525},{"id":"https://openalex.org/C118552586","wikidata":"https://www.wikidata.org/wiki/Q7867","display_name":"Psychiatry","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2310.16517","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.16517","pdf_url":"http://arxiv.org/pdf/2310.16517","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2310.16517","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2310.16517","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.75,"display_name":"Quality education","id":"https://metadata.un.org/sdg/4"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4229884329","https://openalex.org/W3045293259","https://openalex.org/W3032220911","https://openalex.org/W2386244707","https://openalex.org/W2385612156","https://openalex.org/W2382841014","https://openalex.org/W2365653098","https://openalex.org/W2364829248","https://openalex.org/W2349001406","https://openalex.org/W2000811477"],"abstract_inverted_index":{"The":[0],"emergence":[1],"of":[2,23,98,194],"large":[3],"language":[4,10],"models":[5],"(LLMs)":[6],"has":[7],"revolutionized":[8],"natural":[9],"processing":[11],"tasks.":[12],"However,":[13],"existing":[14],"instruction-tuning":[15,59],"datasets":[16,108],"suffer":[17],"from":[18,43,154],"occupational":[19,77,99,138],"bias:":[20],"the":[21,33,185],"majority":[22],"data":[24],"relates":[25],"to":[26,36,40,93,162],"only":[27],"a":[28,95,118,190],"few":[29],"occupations,":[30],"which":[31,63,165],"hampers":[32],"instruction-tuned":[34],"LLMs":[35],"generate":[37],"helpful":[38],"responses":[39],"professional":[41,176],"queries":[42,84],"practitioners":[44],"in":[45,75,178],"specific":[46],"fields.":[47],"To":[48],"mitigate":[49],"this":[50],"issue":[51],"and":[52,68,91,111,147,173,180],"promote":[53],"occupation-inclusive":[54],"LLMs,":[55],"we":[56,113,125],"create":[57],"an":[58,133,140,148],"dataset":[60],"named":[61],"\\emph{OccuQuest},":[62],"contains":[64],"110,000+":[65],"prompt-completion":[66],"pairs":[67],"30,000+":[69],"dialogues":[70],"covering":[71,136],"over":[72],"1,000":[73],"occupations":[74],"26":[76],"categories.":[78],"We":[79,156],"systematically":[80],"request":[81],"ChatGPT,":[82],"organizing":[83],"hierarchically":[85],"based":[86],"on":[87,144,160,175,184],"Occupation,":[88],"Responsibility,":[89],"Topic,":[90],"Question,":[92],"ensure":[94],"comprehensive":[96,131],"coverage":[97],"specialty":[100],"inquiries.":[101],"By":[102],"comparing":[103],"with":[104],"three":[105,127],"commonly":[106],"used":[107],"(Dolly,":[109],"ShareGPT,":[110],"WizardLM),":[112],"observe":[114],"that":[115],"OccuQuest":[116,161],"exhibits":[117],"more":[119],"balanced":[120],"distribution":[121],"across":[122],"occupations.":[123],"Furthermore,":[124],"assemble":[126],"test":[128],"sets":[129],"for":[130],"evaluation,":[132],"occu-test":[134],"set":[135,142,150],"25":[137],"categories,":[139],"estate":[141],"focusing":[143],"real":[145],"estate,":[146],"occu-quora":[149,186],"containing":[151],"real-world":[152],"questions":[153,177],"Quora.":[155],"then":[157],"fine-tune":[158],"LLaMA":[159,169],"obtain":[163],"OccuLLaMA,":[164],"significantly":[166],"outperforms":[167],"state-of-the-art":[168],"variants":[170],"(Vicuna,":[171],"Tulu,":[172],"WizardLM)":[174],"GPT-4":[179],"human":[181],"evaluations.":[182],"Notably,":[183],"set,":[187],"OccuLLaMA":[188],"reaches":[189],"high":[191],"win":[192],"rate":[193],"86.4\\%":[195],"against":[196],"WizardLM.":[197]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4387963919","counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2025-04-24T17:26:32.821509","created_date":"2023-10-27"}