{"id":"https://openalex.org/W4392866649","doi":"https://doi.org/10.48550/arxiv.2403.08309","title":"HRLAIF: Improvements in Helpfulness and Harmlessness in Open-domain\n Reinforcement Learning From AI Feedback","display_name":"HRLAIF: Improvements in Helpfulness and Harmlessness in Open-domain\n Reinforcement Learning From AI Feedback","publication_year":2024,"publication_date":"2024-03-13","ids":{"openalex":"https://openalex.org/W4392866649","doi":"https://doi.org/10.48550/arxiv.2403.08309"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2403.08309","pdf_url":"https://arxiv.org/pdf/2403.08309","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2403.08309","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100413669","display_name":"Ang Li","orcid":"https://orcid.org/0009-0008-2561-8348"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Ang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002604576","display_name":"Qiugen Xiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Qiugen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051067303","display_name":"Peng Cao","orcid":"https://orcid.org/0000-0001-6390-6852"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Peng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067590443","display_name":"Jian Tang","orcid":"https://orcid.org/0000-0003-1324-2466"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Jian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113144150","display_name":"Yi Ping Yuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Yi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101896168","display_name":"Zijie Zhao","orcid":"https://orcid.org/0000-0001-9676-3181"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Zijie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100373745","display_name":"Xiaohong Chen","orcid":"https://orcid.org/0000-0002-9797-8384"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Xiaoyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100425225","display_name":"Liang Zhang","orcid":"https://orcid.org/0000-0002-5805-7099"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Liang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100341802","display_name":"Xiang\u2010Yang Li","orcid":"https://orcid.org/0000-0002-6070-6625"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xiangyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110704386","display_name":"Kaitong Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Kaitong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018635864","display_name":"Weidong Guo","orcid":"https://orcid.org/0000-0002-3952-3541"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Weidong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113144149","display_name":"Yukang Gan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gan, Yukang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101249021","display_name":"Xu Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Xu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031411400","display_name":"Daniell Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Daniell","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5102004349","display_name":"Ying Shan","orcid":"https://orcid.org/0000-0001-7673-8325"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shan, Ying","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.999954,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":84,"max":92},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8248,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8248,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/helpfulness","display_name":"Helpfulness","score":0.95145094}],"concepts":[{"id":"https://openalex.org/C2781265381","wikidata":"https://www.wikidata.org/wiki/Q5710255","display_name":"Helpfulness","level":2,"score":0.95145094},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7084027},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5220832},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.51147354},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.48531622},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.33105826},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2531126},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.15128145},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09411016},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2403.08309","pdf_url":"https://arxiv.org/pdf/2403.08309","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2403.08309","pdf_url":"https://arxiv.org/pdf/2403.08309","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4285360723","https://openalex.org/W4281847990","https://openalex.org/W3037056935","https://openalex.org/W2934621214","https://openalex.org/W2748952813","https://openalex.org/W2613921548","https://openalex.org/W2611407113","https://openalex.org/W2488228222","https://openalex.org/W2002563848","https://openalex.org/W1498449133"],"abstract_inverted_index":{"Reinforcement":[0,17,109,181],"Learning":[1,18,110,182],"from":[2,19,111],"AI":[3,112,121,137],"Feedback":[4,21,113],"(RLAIF)":[5],"has":[6],"the":[7,28,76,118,126,143,153,170,177,195],"advantages":[8],"of":[9,33,93,100,120,155,173,188,197,200],"shorter":[10],"annotation":[11],"cycles":[12],"and":[13,95],"lower":[14],"costs":[15],"over":[16],"Human":[20,146],"(RLHF),":[22],"making":[23,125],"it":[24,135,184],"highly":[25],"efficient":[26],"during":[27],"rapid":[29],"strategy":[30],"iteration":[31],"periods":[32],"large":[34],"language":[35],"model":[36,64,179],"(LLM)":[37],"training.":[38],"Using":[39],"ChatGPT":[40],"as":[41],"a":[42,67,164,198],"labeler":[43],"to":[44,84,157,176],"provide":[45],"feedback":[46],"on":[47],"open-domain":[48],"prompts":[49],"in":[50,57,69,78,91,131,190,202],"RLAIF":[51,156],"training,":[52],"we":[53,106],"observe":[54],"an":[55,186],"increase":[56,187],"human":[58,159],"evaluators'":[59,70],"preference":[60,160],"win":[61],"ratio":[62],"for":[63,123,138,161],"responses,":[65,124],"but":[66],"decrease":[68,77,199],"satisfaction":[71,79,171,191,203],"rate.":[72],"Analysis":[73],"suggests":[74],"that":[75,150],"rate":[80,172,204],"is":[81],"mainly":[82],"due":[83],"some":[85],"responses":[86],"becoming":[87],"less":[88],"helpful,":[89],"particularly":[90],"terms":[92],"correctness":[94],"truthfulness,":[96],"highlighting":[97],"practical":[98],"limitations":[99],"basic":[101,206],"RLAIF.":[102,207],"In":[103],"this":[104],"paper,":[105],"propose":[107],"Hybrid":[108],"(HRLAIF).":[114],"This":[115],"method":[116],"enhances":[117],"accuracy":[119],"annotations":[122],"model's":[127,144],"helpfulness":[128],"more":[129],"robust":[130],"training":[132],"process.":[133],"Additionally,":[134],"employs":[136],"Red":[139],"Teaming,":[140],"further":[141],"improving":[142,169],"harmlessness.":[145],"evaluation":[147],"results":[148],"show":[149],"HRLAIF":[151],"inherits":[152],"ability":[154],"enhance":[158],"outcomes":[162],"at":[163],"low":[165],"cost":[166],"while":[167],"also":[168],"responses.":[174],"Compared":[175],"policy":[178],"before":[180],"(RL),":[183],"achieves":[185],"2.08\\%":[189],"rate,":[192],"effectively":[193],"addressing":[194],"issue":[196],"4.58\\%":[201],"after":[205]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4392866649","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-01-04T08:57:26.558538","created_date":"2024-03-16"}