{"id":"https://openalex.org/W4387322626","doi":"https://doi.org/10.48550/arxiv.2310.01045","title":"Tool-Augmented Reward Modeling","display_name":"Tool-Augmented Reward Modeling","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4387322626","doi":"https://doi.org/10.48550/arxiv.2310.01045"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2310.01045","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2310.01045","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100440277","display_name":"Lei Li","orcid":"https://orcid.org/0000-0002-0688-9619"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Lei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072082849","display_name":"Yekun Chai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chai, Yekun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058880150","display_name":"Shuohuan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Shuohuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100567346","display_name":"Yu Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Yu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110696480","display_name":"Hao Tian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian, Hao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030810655","display_name":"Ningyu Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Ningyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5111717250","display_name":"Hua Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Hua","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.765593,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":78,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10703","display_name":"Business Process Modeling and Analysis","score":0.1913,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10703","display_name":"Business Process Modeling and Analysis","score":0.1913,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10910","display_name":"Corporate Governance and Management","score":0.1851,"subfield":{"id":"https://openalex.org/subfields/1408","display_name":"Strategy and Management"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.44072855},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.36075008}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2310.01045","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.01045","pdf_url":"http://arxiv.org/pdf/2310.01045","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2310.01045","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2310.01045","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391913857","https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2478288626","https://openalex.org/W2390279801","https://openalex.org/W2382290278","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2350741829","https://openalex.org/W2001405890"],"abstract_inverted_index":{"Reward":[0],"modeling":[1,58],"(a.k.a.,":[2],"preference":[3,57,159],"modeling)":[4],"is":[5],"instrumental":[6],"for":[7],"aligning":[8],"large":[9],"language":[10],"models":[11,29],"with":[12,38,69,114,180],"human":[13,23,176],"preferences,":[14],"particularly":[15],"within":[16],"the":[17,103,220],"context":[18],"of":[19,105,138,153,187,203],"reinforcement":[20],"learning":[21],"from":[22,209],"feedback":[24],"(RLHF).":[25],"While":[26],"conventional":[27],"reward":[28,89],"(RMs)":[30],"have":[31,218],"exhibited":[32],"remarkable":[33],"scalability,":[34],"they":[35],"oft":[36],"struggle":[37],"fundamental":[39],"functionality":[40],"such":[41],"as":[42],"arithmetic":[43],"computation,":[44],"code":[45],"execution,":[46],"and":[47,76,88,96,118,123,223,230],"factual":[48],"lookup.":[49],"In":[50,175],"this":[51],"paper,":[52],"we":[53,198],"propose":[54],"a":[55,135,149,200],"tool-augmented":[56],"approach,":[59],"named":[60],"Themis,":[61],"to":[62,71,112,191,228],"address":[63],"these":[64],"limitations":[65],"by":[66,167],"empowering":[67],"RMs":[68],"access":[70],"external":[72,106,116,143],"environments,":[73],"including":[74],"calculators":[75],"search":[77],"engines.":[78],"This":[79],"approach":[80,133,163],"not":[81],"only":[82],"fosters":[83],"synergy":[84],"between":[85],"tool":[86,121,212],"utilization":[87],"grading":[90],"but":[91],"also":[92],"enhances":[93],"interpretive":[94],"capacity":[95],"scoring":[97],"reliability.":[98],"Our":[99,145],"study":[100],"delves":[101],"into":[102,108],"integration":[104],"tools":[107],"RMs,":[109],"enabling":[110],"them":[111],"interact":[113],"diverse":[115],"sources":[117],"construct":[119],"task-specific":[120],"engagement":[122],"reasoning":[124],"traces":[125],"in":[126,158,172],"an":[127,183],"autoregressive":[128],"manner.":[129],"We":[130,217],"validate":[131],"our":[132,162],"across":[134,155,193],"wide":[136],"range":[137],"domains,":[139],"incorporating":[140,207],"seven":[141,210],"distinct":[142,195,211],"tools.":[144],"experimental":[146],"results":[147],"demonstrate":[148],"noteworthy":[150],"overall":[151],"improvement":[152],"17.7%":[154],"eight":[156],"tasks":[157],"ranking.":[160],"Furthermore,":[161],"outperforms":[164],"Gopher":[165],"280B":[166],"7.3%":[168],"on":[169],"TruthfulQA":[170],"task":[171],"zero-shot":[173],"evaluation.":[174],"evaluations,":[177],"RLHF":[178],"trained":[179],"Themis":[181],"attains":[182],"average":[184],"win":[185],"rate":[186],"32%":[188],"when":[189],"compared":[190],"baselines":[192],"four":[194],"tasks.":[196],"Additionally,":[197],"provide":[199],"comprehensive":[201],"collection":[202],"tool-related":[204],"RM":[205],"datasets,":[206],"data":[208],"APIs,":[213],"totaling":[214],"15,000":[215],"instances.":[216],"made":[219],"code,":[221],"data,":[222],"model":[224],"checkpoints":[225],"publicly":[226],"available":[227],"facilitate":[229],"inspire":[231],"further":[232],"research":[233],"advancements\\footnote{\\url{https://github.com/ernie-research/Tool-Augmented-Reward-Model}}.":[234]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4387322626","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-01-21T05:38:17.126713","created_date":"2023-10-04"}