{"id":"https://openalex.org/W4391591347","doi":"https://doi.org/10.48550/arxiv.2402.02665","title":"Utility-Based Reinforcement Learning: Unifying Single-objective and\n Multi-objective Reinforcement Learning","display_name":"Utility-Based Reinforcement Learning: Unifying Single-objective and\n Multi-objective Reinforcement Learning","publication_year":2024,"publication_date":"2024-02-04","ids":{"openalex":"https://openalex.org/W4391591347","doi":"https://doi.org/10.48550/arxiv.2402.02665"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2402.02665","pdf_url":"https://arxiv.org/pdf/2402.02665","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2402.02665","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080095540","display_name":"Peter Vamplew","orcid":"https://orcid.org/0000-0002-8687-4424"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vamplew, Peter","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013402425","display_name":"Cameron Foale","orcid":"https://orcid.org/0000-0003-2537-0326"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Foale, Cameron","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014572669","display_name":"Conor F. Hayes","orcid":"https://orcid.org/0000-0003-4783-7126"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hayes, Conor F.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046330057","display_name":"Patrick Mannion","orcid":"https://orcid.org/0000-0002-7951-878X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mannion, Patrick","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074452240","display_name":"Enda Howley","orcid":"https://orcid.org/0000-0003-2687-4630"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Howley, Enda","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032749222","display_name":"Richard Dazeley","orcid":"https://orcid.org/0000-0002-6199-9685"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dazeley, Richard","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113436401","display_name":"Scott Fitzgerald Johnson","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Johnson, Scott","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085101949","display_name":"Johan K\u00e4llstr\u00f6m","orcid":"https://orcid.org/0000-0002-4144-4893"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"K\u00e4llstr\u00f6m, Johan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000440246","display_name":"Gabriel de Oliveira Ramos","orcid":"https://orcid.org/0000-0002-6488-7654"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ramos, Gabriel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008556334","display_name":"Roxana R\u0103dulescu","orcid":"https://orcid.org/0000-0003-1446-5514"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"R\u0103dulescu, Roxana","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058224907","display_name":"Willem R\u00f6pke","orcid":"https://orcid.org/0000-0001-5045-6127"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"R\u00f6pke, Willem","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5081436755","display_name":"Diederik M. Roijers","orcid":"https://orcid.org/0000-0002-2825-2491"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roijers, Diederik M.","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":77},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10539","display_name":"Sustainable Supply Chain Management","score":0.6412,"subfield":{"id":"https://openalex.org/subfields/1408","display_name":"Strategy and Management"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10539","display_name":"Sustainable Supply Chain Management","score":0.6412,"subfield":{"id":"https://openalex.org/subfields/1408","display_name":"Strategy and Management"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10328","display_name":"Supply Chain and Inventory Management","score":0.596,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/error-driven-learning","display_name":"Error-driven learning","score":0.43922228}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8543265},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.7529105},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4565638},{"id":"https://openalex.org/C47932503","wikidata":"https://www.wikidata.org/wiki/Q5395689","display_name":"Error-driven learning","level":3,"score":0.43922228},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40934393},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.22833171},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.09040898}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2402.02665","pdf_url":"https://arxiv.org/pdf/2402.02665","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2402.02665","pdf_url":"https://arxiv.org/pdf/2402.02665","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W87513465","https://openalex.org/W8539471","https://openalex.org/W4312372616","https://openalex.org/W3203256658","https://openalex.org/W2786230833","https://openalex.org/W2391666574","https://openalex.org/W2371091044","https://openalex.org/W2352650970","https://openalex.org/W2171010636","https://openalex.org/W1493952344"],"abstract_inverted_index":{"Research":[0],"in":[1],"multi-objective":[2],"reinforcement":[3,44],"learning":[4,45,58],"(MORL)":[5],"has":[6],"introduced":[7],"the":[8,23,27,40,53,74],"utility-based":[9,80],"paradigm,":[10],"which":[11],"makes":[12],"use":[13],"of":[14,42,77],"both":[15],"environmental":[16],"rewards":[17],"and":[18,47,68],"a":[19,79],"function":[20],"that":[21],"defines":[22],"utility":[24],"derived":[25],"by":[26],"user":[28],"from":[29],"those":[30],"rewards.":[31],"In":[32],"this":[33,37],"paper":[34],"we":[35],"extend":[36],"paradigm":[38],"to":[39,55,62],"context":[41],"single-objective":[43],"(RL),":[46],"outline":[48],"multiple":[49],"potential":[50],"benefits":[51],"including":[52],"ability":[54],"perform":[56],"multi-policy":[57],"across":[59],"tasks":[60],"relating":[61],"uncertain":[63],"objectives,":[64],"risk-aware":[65],"RL,":[66],"discounting,":[67],"safe":[69],"RL.":[70],"We":[71],"also":[72],"examine":[73],"algorithmic":[75],"implications":[76],"adopting":[78],"approach.":[81]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4391591347","counts_by_year":[],"updated_date":"2025-04-18T23:02:53.616692","created_date":"2024-02-07"}