{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,7,30]],"date-time":"2024-07-30T08:27:31Z","timestamp":1722328051212},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2021,3,12]],"date-time":"2021-03-12T00:00:00Z","timestamp":1615507200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,3,12]],"date-time":"2021-03-12T00:00:00Z","timestamp":1615507200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput & Applic"],"published-print":{"date-parts":[[2022,2]]},"DOI":"10.1007\/s00521-021-05859-1","type":"journal-article","created":{"date-parts":[[2021,3,12]],"date-time":"2021-03-12T10:02:37Z","timestamp":1615543357000},"page":"1783-1799","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["The impact of environmental stochasticity on value-based multiobjective reinforcement learning"],"prefix":"10.1007","volume":"34","author":[{"ORCID":"http:\/\/orcid.org\/0000-0002-8687-4424","authenticated-orcid":false,"given":"Peter","family":"Vamplew","sequence":"first","affiliation":[]},{"given":"Cameron","family":"Foale","sequence":"additional","affiliation":[]},{"given":"Richard","family":"Dazeley","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,3,12]]},"reference":[{"key":"5859_CR1","unstructured":"Abels A, Roijers D, Lenaerts T, Now\u00e9 A, Steckelmacher D (2019) Dynamic weights in multi-objective deep reinforcement learning. In: International conference on machine learning (ICML), pp 11\u201320"},{"key":"5859_CR2","doi-asserted-by":"crossref","unstructured":"Barrett L, Narayanan S (2008) Learning all optimal policies with multiple criteria. In: ICML, pp 41\u201347","DOI":"10.1145\/1390156.1390162"},{"key":"5859_CR3","unstructured":"Bryce D, Cushing W, Kambhampati S (2007) Probabilistic planning is multi-objective. Arizona State University, Technical Report ASU-CSE-07-006"},{"key":"5859_CR4","doi-asserted-by":"crossref","unstructured":"Castelletti A, Galelli S, Restelli M, Soncini-Sessa R (2010) Tree-based reinforcement learning for optimal water reservoir operation. Water Resour Res 46(9)","DOI":"10.1029\/2009WR008898"},{"key":"5859_CR5","doi-asserted-by":"crossref","unstructured":"Debreu G (1997) On the preferences characterization of additively separable utility. In: Constructing scalar-valued objective functions. Springer, pp 25\u201338","DOI":"10.1007\/978-3-642-48773-6_3"},{"key":"5859_CR6","unstructured":"Di\u00a0Castro D, Tamar A, Mannor S (2012) Policy gradients with variance related risk criteria. In: ICML, pp 1651\u20131658"},{"key":"5859_CR7","first-page":"197","volume":"98","author":"Z G\u00e1bor","year":"1998","unstructured":"G\u00e1bor Z, Kalm\u00e1r Z, Szepesv\u00e1ri C (1998) Multi-criteria reinforcement learning. ICML 98:197\u2013205","journal-title":"ICML"},{"key":"5859_CR8","doi-asserted-by":"crossref","unstructured":"Geibel P (2006) Reinforcement learning for MDPs with constraints. In: European conference on machine learning (ECML). Springer, pp 646\u2013653","DOI":"10.1007\/11871842_63"},{"issue":"3","key":"5859_CR9","doi-asserted-by":"publisher","first-page":"352","DOI":"10.1007\/s10015-019-00523-3","volume":"24","author":"N Horie","year":"2019","unstructured":"Horie N, Matsui T, Moriyama K, Mutoh A, Inuzuka N (2019) Multi-objective safe reinforcement learning: the relationship between multi-objective reinforcement learning and safe reinforcement learning. Artif Life Robot 24(3):352\u2013359","journal-title":"Artif Life Robot"},{"key":"5859_CR10","doi-asserted-by":"crossref","unstructured":"Issabekov R, Vamplew P (2012) An empirical comparison of two common multiobjective reinforcement learning algorithms. In: Australasian joint conference on artificial intelligence (AJCAI). Springer, pp 626\u2013636","DOI":"10.1007\/978-3-642-35101-3_53"},{"key":"5859_CR11","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/j.neucom.2016.11.094","volume":"263","author":"S Parisi","year":"2017","unstructured":"Parisi S, Pirotta M, Peters J (2017) Manifold-based multi-objective policy search with sample reuse. Neurocomputing 263:3\u201314","journal-title":"Neurocomputing"},{"key":"5859_CR12","doi-asserted-by":"crossref","unstructured":"Perez J, Germain-Renaud C, K\u00e9gl B, Loomis C (2009) Responsive elastic computing. In: Proceedings of the 6th international conference industry session on Grids meets autonomic computing, pp 55\u201364","DOI":"10.1145\/1555301.1555311"},{"key":"5859_CR13","doi-asserted-by":"crossref","unstructured":"Pirotta M, Parisi S, Restelli M (2015) Multi-objective reinforcement learning with continuous Pareto frontier approximation. In: Twenty-ninth AAAI conference on artificial intelligence","DOI":"10.1613\/jair.4961"},{"key":"5859_CR14","unstructured":"R\u0103dulescu R, Mannion P, Roijers DM, Now\u00e9 A (2019) Equilibria in multi-objective games: a utility-based perspective. In: Proceedings of the adaptive and learning agents workshop (ALA-19) at AAMAS"},{"key":"5859_CR15","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1613\/jair.3987","volume":"48","author":"DM Roijers","year":"2013","unstructured":"Roijers DM, Vamplew P, Whiteson S, Dazeley R (2013a) A survey of multi-objective sequential decision-making. J. Artif. Intell. Res. 48:67\u2013113","journal-title":"J. Artif. Intell. Res."},{"key":"5859_CR16","doi-asserted-by":"crossref","unstructured":"Roijers DM, Whiteson S, Oliehoek FA (2013b) Computing convex coverage sets for multi-objective coordination graphs. In: International conference on algorithmic decision theory. Springer, pp 309\u2013323","DOI":"10.1007\/978-3-642-41575-3_24"},{"key":"5859_CR17","unstructured":"Roijers DM, Steckelmacher D, Now\u00e9 A (2018) Multi-objective reinforcement learning for the expected utility of the return. In: Adaptive learning agents (ALA) workshop at AAMAS, vol\u00a018"},{"key":"5859_CR18","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1016\/j.neucom.2016.10.100","volume":"263","author":"M Ruiz-Montiel","year":"2017","unstructured":"Ruiz-Montiel M, Mandow L, P\u00e9rez-de-la Cruz JL (2017) A temporal difference method for multi-objective reinforcement learning. Neurocomputing 263:15\u201325","journal-title":"Neurocomputing"},{"key":"5859_CR19","unstructured":"Shelton CR (2001) Importance sampling for reinforcement learning with multiple objectives. AI technical report 2001-003. MIT"},{"key":"5859_CR20","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: an introduction. MIT Press, Cambridge"},{"issue":"1","key":"5859_CR21","first-page":"361","volume":"17","author":"A Tamar","year":"2016","unstructured":"Tamar A, Di Castro D, Mannor S (2016) Learning the variance of the reward-to-go. J Mach Learn Res 17(1):361\u2013396","journal-title":"J Mach Learn Res"},{"key":"5859_CR22","doi-asserted-by":"crossref","unstructured":"Uchibe E, Doya K (2007) Constrained reinforcement learning from intrinsic and extrinsic rewards. In: 2007 IEEE 6th international conference on development and learning. IEEE, pp 163\u2013168","DOI":"10.1109\/DEVLRN.2007.4354030"},{"key":"5859_CR23","doi-asserted-by":"crossref","unstructured":"Vamplew P, Yearwood J, Dazeley R, Berry A (2008) On the limitations of scalarisation for multi-objective reinforcement learning of Pareto fronts. In: AJCAI. Springer, pp 372\u2013378","DOI":"10.1007\/978-3-540-89378-3_37"},{"key":"5859_CR24","doi-asserted-by":"crossref","unstructured":"Vamplew P, Dazeley R, Barker E, Kelarev A (2009) Constructing stochastic mixture policies for episodic multiobjective reinforcement learning tasks. In: AJCAI. Springer, pp 340\u2013349","DOI":"10.1007\/978-3-642-10439-8_35"},{"issue":"1\u20132","key":"5859_CR25","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1007\/s10994-010-5232-5","volume":"84","author":"P Vamplew","year":"2011","unstructured":"Vamplew P, Dazeley R, Berry A, Issabekov R, Dekker E (2011) Empirical evaluation methods for multiobjective reinforcement learning algorithms. Mach Learn 84(1\u20132):51\u201380","journal-title":"Mach Learn"},{"key":"5859_CR26","doi-asserted-by":"crossref","unstructured":"Vamplew P, Issabekov R, Dazeley R, Foale C (2015) Reinforcement learning of Pareto-optimal multiobjective policies using steering. In: AJCAI. Springer, pp 596\u2013608","DOI":"10.1007\/978-3-319-26350-2_53"},{"key":"5859_CR27","doi-asserted-by":"publisher","first-page":"74","DOI":"10.1016\/j.neucom.2016.09.141","volume":"263","author":"P Vamplew","year":"2017","unstructured":"Vamplew P, Dazeley R, Foale C (2017) Softmax exploration strategies for multiobjective reinforcement learning. Neurocomputing 263:74\u201386","journal-title":"Neurocomputing"},{"key":"5859_CR28","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1016\/j.neucom.2016.08.152","volume":"263","author":"P Vamplew","year":"2017","unstructured":"Vamplew P, Issabekov R, Dazeley R, Foale C, Berry A, Moore T, Creighton D (2017) Steering approaches to Pareto-optimal multiobjective reinforcement learning. Neurocomputing 263:26\u201338","journal-title":"Neurocomputing"},{"key":"5859_CR29","doi-asserted-by":"publisher","first-page":"326","DOI":"10.1016\/j.neucom.2018.06.066","volume":"314","author":"P Vamplew","year":"2018","unstructured":"Vamplew P, Dazeley R, Foale C, Choudhury T (2018) Non-functional regression: a new challenge for neural networks. Neurocomputing 314:326\u2013335","journal-title":"Neurocomputing"},{"issue":"1","key":"5859_CR30","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1007\/s10676-017-9440-6","volume":"20","author":"P Vamplew","year":"2018","unstructured":"Vamplew P, Dazeley R, Foale C, Firmin S, Mummery J (2018) Human-aligned artificial intelligence is a multiobjective problem. Ethics Inform Technol 20(1):27\u201340","journal-title":"Ethics Inform Technol"},{"key":"5859_CR31","doi-asserted-by":"crossref","unstructured":"Vamplew P, Foale C, Dazeley R, Bignold A (2021) Potential-based multiobjective reinforcement learning approaches to low-impact agents for AI safety. Eng Appl Artif Intell 100","DOI":"10.1016\/j.engappai.2021.104186"},{"issue":"1","key":"5859_CR32","first-page":"3483","volume":"15","author":"K Van Moffaert","year":"2014","unstructured":"Van Moffaert K, Now\u00e9 A (2014) Multi-objective reinforcement learning using sets of Pareto dominating policies. J Mach Learn Res 15(1):3483\u20133512","journal-title":"J Mach Learn Res"},{"key":"5859_CR33","doi-asserted-by":"crossref","unstructured":"Van\u00a0Moffaert K, Drugan MM, Now\u00e9 A (2013a) Hypervolume-based multi-objective reinforcement learning. In: International conference on evolutionary multi-criterion optimization. Springer, pp 352\u2013366","DOI":"10.1007\/978-3-642-37140-0_28"},{"key":"5859_CR34","doi-asserted-by":"crossref","unstructured":"Van\u00a0Moffaert K, Drugan MM, Now\u00e9 A (2013b) Scalarized multi-objective reinforcement learning: Novel design techniques. In: 2013 IEEE symposium on adaptive dynamic programming and reinforcement learning (ADPRL). IEEE, pp 191\u2013199","DOI":"10.1109\/ADPRL.2013.6615007"},{"key":"5859_CR35","doi-asserted-by":"crossref","unstructured":"Van\u00a0Moffaert K, Brys T, Now\u00e9 A (2015) Risk-sensitivity through multi-objective reinforcement learning. In: 2015 IEEE congress on evolutionary computation (CEC). IEEE, pp 1746\u20131753","DOI":"10.1109\/CEC.2015.7257098"},{"issue":"2","key":"5859_CR36","doi-asserted-by":"publisher","first-page":"639","DOI":"10.1016\/0022-247X(82)90122-6","volume":"89","author":"D White","year":"1982","unstructured":"White D (1982) Multi-objective infinite-horizon discounted Markov decision processes. J Math Anal Appl 89(2):639\u2013647","journal-title":"J Math Anal Appl"},{"key":"5859_CR37","doi-asserted-by":"crossref","unstructured":"Wiering MA, De\u00a0Jong ED (2007) Computing optimal stationary policies for multi-objective Markov decision processes. In: ADPRL. IEEE, pp 158\u2013165","DOI":"10.1109\/ADPRL.2007.368183"},{"key":"5859_CR38","doi-asserted-by":"crossref","unstructured":"Wiering MA, Withagen M, Drugan MM (2014) Model-based multi-objective reinforcement learning. In: ADPRL. IEEE, pp 1\u20136","DOI":"10.1109\/ADPRL.2014.7010622"},{"key":"5859_CR39","doi-asserted-by":"crossref","unstructured":"Yamaguchi T, Nagahama S, Ichikawa Y, Takadama K (2019) Model-based multi-objective reinforcement learning with unknown weights. In: International conference on human-computer interaction. Springer, pp 311\u2013321","DOI":"10.1007\/978-3-030-22649-7_25"},{"key":"5859_CR40","unstructured":"Zintgraf LM, Kanters TV, Roijers DM, Oliehoek F, Beau P (2015) Quality assessment of MORL algorithms: a utility-based approach. In: Benelearn 2015: proceedings of the 24th annual machine learning conference of Belgium and the Netherlands"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-021-05859-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-021-05859-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-021-05859-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,2,4]],"date-time":"2022-02-04T18:09:50Z","timestamp":1643998190000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-021-05859-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,3,12]]},"references-count":40,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2022,2]]}},"alternative-id":["5859"],"URL":"https:\/\/doi.org\/10.1007\/s00521-021-05859-1","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,3,12]]},"assertion":[{"value":"14 October 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 February 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 March 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}