{"id":"https://openalex.org/W4386346085","doi":"https://doi.org/10.48550/arxiv.2308.15470","title":"Policy composition in reinforcement learning via multi-objective policy optimization","display_name":"Policy composition in reinforcement learning via multi-objective policy optimization","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4386346085","doi":"https://doi.org/10.48550/arxiv.2308.15470"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2308.15470","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2308.15470","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060243836","display_name":"Debahuti Mishra","orcid":"https://orcid.org/0000-0002-9847-1411"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mishra, Shruti","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002469541","display_name":"Ankit Anand","orcid":"https://orcid.org/0000-0002-8832-3212"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Anand, Ankit","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063020928","display_name":"Jordan Hoffmann","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hoffmann, Jordan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062951341","display_name":"Nicolas Heess","orcid":"https://orcid.org/0000-0001-7876-9256"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Heess, Nicolas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041323275","display_name":"Martin Riedmiller","orcid":"https://orcid.org/0000-0002-8465-5690"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Riedmiller, Martin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007133617","display_name":"Abbas Abdolmaleki","orcid":"https://orcid.org/0000-0001-6692-5856"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abdolmaleki, Abbas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5065836447","display_name":"Doina Precup","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Precup, Doina","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":67},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8396,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8396,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.5909966},{"id":"https://openalex.org/keywords/policy-learning","display_name":"Policy learning","score":0.530613}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8042606},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.72480714},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.61203545},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.5909966},{"id":"https://openalex.org/C2779436431","wikidata":"https://www.wikidata.org/wiki/Q30672407","display_name":"Policy learning","level":2,"score":0.530613},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.51283365},{"id":"https://openalex.org/C182306322","wikidata":"https://www.wikidata.org/wiki/Q1779371","display_name":"Order (exchange)","level":2,"score":0.45137516},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.43760312},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.34102955},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31980932},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.25347954},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.21193355},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.1247789},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.114822805},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2308.15470","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2308.15470","pdf_url":"http://arxiv.org/pdf/2308.15470","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2308.15470","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2308.15470","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.46,"display_name":"Quality education","id":"https://metadata.un.org/sdg/4"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4390421286","https://openalex.org/W4389724018","https://openalex.org/W4318719684","https://openalex.org/W4318559728","https://openalex.org/W4281847915","https://openalex.org/W4280563792","https://openalex.org/W3183136280","https://openalex.org/W2911962197","https://openalex.org/W2775233965","https://openalex.org/W2140186469"],"abstract_inverted_index":{"We":[0,213],"enable":[1],"reinforcement":[2],"learning":[3,148],"agents":[4,75,128,171,184],"to":[5,25,89,99,116,126,131,133,175,187,194,209],"learn":[6],"successful":[7],"behavior":[8],"policies":[9,18,52,79,93,135,193,220,225],"by":[10,138],"utilizing":[11],"relevant":[12],"pre-existing":[13],"teacher":[14,17,51,78,134,192,211,224],"policies.":[15,212],"The":[16,124],"are":[19,86,129,136,185],"introduced":[20],"as":[21],"objectives,":[22],"in":[23,29,59,80,83,97,205],"addition":[24],"the":[26,36,60,92,95,101,105,118,143,151,155,158,161,173,177,191,201,210,215,222],"task":[27,109,198,203,219],"objective,":[28],"a":[30,39,196],"multi-objective":[31],"policy":[32],"optimization":[33],"setting.":[34],"Using":[35],"Multi-Objective":[37],"Maximum":[38],"Posteriori":[40],"Policy":[41],"Optimization":[42],"algorithm":[43],"(Abdolmaleki":[44],"et":[45,165],"al.":[46,166],"2020),":[47],"we":[48,168],"show":[49,214],"that":[50],"can":[53],"help":[54],"speed":[55,149],"up":[56],"learning,":[57],"particularly":[58],"absence":[61],"of":[62,94,108,121,145,154,179,217],"shaping":[63],"rewards.":[64],"In":[65,160],"two":[66],"domains":[67],"with":[68,172,221],"continuous":[69],"observation":[70],"and":[71,82,85,110,150],"action":[72],"spaces,":[73],"our":[74],"successfully":[76],"compose":[77,189],"sequence":[81],"parallel,":[84],"also":[87,169],"able":[88,186],"further":[90],"extend":[91],"teachers":[96,146],"order":[98],"solve":[100],"task.":[102,159],"Depending":[103],"on":[104,147,157,200],"specified":[106],"combination":[107],"teacher(s),":[111],"teacher(s)":[112],"may":[113],"naturally":[114],"act":[115],"limit":[117],"final":[119],"performance":[120,153],"an":[122],"agent.":[123],"extent":[125],"which":[127,140],"required":[130],"adhere":[132],"determined":[137],"hyperparameters":[139],"determine":[141],"both":[142],"effect":[144],"eventual":[152],"agent":[156],"humanoid":[162],"domain":[163],"(Tassa":[164],"2018),":[167],"equip":[170],"ability":[174],"control":[176],"selection":[178],"teachers.":[180],"With":[181],"this":[182],"ability,":[183],"meaningfully":[188],"from":[190],"achieve":[195],"superior":[197],"reward":[199],"walk":[202],"than":[204],"cases":[206],"without":[207],"access":[208],"resemblance":[216],"composed":[218],"corresponding":[223],"through":[226],"videos.":[227]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4386346085","counts_by_year":[],"updated_date":"2025-01-21T06:00:56.631067","created_date":"2023-09-01"}