{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,7,7]],"date-time":"2024-07-07T23:45:01Z","timestamp":1720395901286},"reference-count":30,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2017,11,1]],"date-time":"2017-11-01T00:00:00Z","timestamp":1509494400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[2017,11]]},"DOI":"10.1016\/j.neucom.2017.04.074","type":"journal-article","created":{"date-parts":[[2017,6,20]],"date-time":"2017-06-20T05:16:33Z","timestamp":1497935793000},"page":"39-47","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":14,"special_numbering":"C","title":["Identification and off-policy learning of multiple objectives using adaptive clustering"],"prefix":"10.1016","volume":"263","author":[{"ORCID":"http:\/\/orcid.org\/0000-0001-8918-3314","authenticated-orcid":false,"given":"Thommen George","family":"Karimpanal","sequence":"first","affiliation":[]},{"given":"Erik","family":"Wilhelm","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.neucom.2017.04.074_bib0001","series-title":"Reinforcement learning: An introduction","volume":"1","author":"Sutton","year":"1998"},{"key":"10.1016\/j.neucom.2017.04.074_bib0002","series-title":"Proceedings of International Conference on Machine Learning (ICML)","first-page":"417","article-title":"Off-policy temporal-difference learning with function approximation","author":"Precup","year":"2001"},{"key":"10.1016\/j.neucom.2017.04.074_bib0003","series-title":"Learning from delayed rewards, (Ph.D. thesis)","author":"Watkins","year":"1989"},{"issue":"1","key":"10.1016\/j.neucom.2017.04.074_bib0004","first-page":"289","article-title":"Off-policy learning with eligibility traces: a survey.","volume":"15","author":"Geist","year":"2014","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.neucom.2017.04.074_bib0005","series-title":"Proceedings of the 27th International Conference on Machine Learning (ICML-10)","first-page":"1207","article-title":"Convergence of least squares temporal difference methods under general conditions","author":"Yu","year":"2010"},{"issue":"Dec","key":"10.1016\/j.neucom.2017.04.074_bib0006","first-page":"1107","article-title":"Least-squares policy iteration","volume":"4","author":"Lagoudakis","year":"2003","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.neucom.2017.04.074_bib0007","first-page":"80","article-title":"Eligibility traces for o-policy policy evaluation","author":"Precup","year":"2000","journal-title":"Computer Science Department Faculty Publication Series"},{"key":"10.1016\/j.neucom.2017.04.074_bib0008","series-title":"Proceedings of the 3rd Conference on Artificial General Intelligence","first-page":"91","article-title":"Gq (\u03bb): a general gradient algorithm for temporal-difference prediction learning with eligibility traces","volume":"1","author":"Maei","year":"2010"},{"key":"10.1016\/j.neucom.2017.04.074_bib0009","series-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"Puterman","year":"1994"},{"issue":"1","key":"10.1016\/j.neucom.2017.04.074_bib0010","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1007\/BF00115009","article-title":"Learning to predict by the methods of temporal differences","volume":"3","author":"Sutton","year":"1988","journal-title":"Mach. Learn."},{"issue":"1\u20133","key":"10.1016\/j.neucom.2017.04.074_bib0011","doi-asserted-by":"crossref","first-page":"123","DOI":"10.1007\/BF00114726","article-title":"Reinforcement learning with replacing eligibility traces","volume":"22","author":"Singh","year":"1996","journal-title":"Mach. Learn."},{"key":"10.1016\/j.neucom.2017.04.074_bib0012","series-title":"Proceedings of the 10th International Conference on Autonomous Agents and Multiagent Systems-Volume 2","first-page":"761","article-title":"Horde: a scalable real-time architecture for learning knowledge from unsupervised sensorimotor interaction","author":"Sutton","year":"2011"},{"key":"10.1016\/j.neucom.2017.04.074_bib0013","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1613\/jair.3987","article-title":"A survey of multi-objective sequential decision-making.","volume":"48","author":"Roijers","year":"2013","journal-title":"J. Artif. Intell. Res. (JAIR)"},{"issue":"2","key":"10.1016\/j.neucom.2017.04.074_bib0014","doi-asserted-by":"crossref","first-page":"146","DOI":"10.1177\/1059712313511648","article-title":"Multi-timescale nexting in a reinforcement learning robot","volume":"22","author":"Modayil","year":"2014","journal-title":"Adapt. Behav."},{"key":"10.1016\/j.neucom.2017.04.074_bib0015","series-title":"Proceedings of IEEE International Conference on Development and Learning and Epigenetic Robotics (ICDL)","first-page":"1","article-title":"Scaling life-long off-policy learning","author":"White","year":"2012"},{"key":"10.1016\/j.neucom.2017.04.074_bib0016","series-title":"In Proceedings of the 15th International Conference on Machine Learning","first-page":"556","article-title":"Intra-option learning about temporally abstract actions","author":"Sutton","year":"1998"},{"issue":"1\u20132","key":"10.1016\/j.neucom.2017.04.074_bib0017","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","article-title":"Between MDPS and semi-MDPS: a framework for temporal abstraction in reinforcement learning","volume":"112","author":"Sutton","year":"1999","journal-title":"Artif. Intell."},{"key":"10.1016\/j.neucom.2017.04.074_bib0018","series-title":"Proceedings of the 21st International Conference on Machine Learning","first-page":"71","article-title":"Dynamic abstraction in reinforcement learning via clustering","author":"Mannor","year":"2004"},{"issue":"1","key":"10.1016\/j.neucom.2017.04.074_bib0019","first-page":"100","article-title":"Algorithm as 136: a k-means clustering algorithm","volume":"28","author":"Hartigan","year":"1979","journal-title":"J. R. Stat. Soc. Ser. C (Appl. Stat.)"},{"key":"10.1016\/j.neucom.2017.04.074_bib0020","series-title":"Cluster analysis for applications: probability and mathematical statistics: a series of monographs and textbooks","volume":"19","author":"Anderberg","year":"2014"},{"key":"10.1016\/j.neucom.2017.04.074_bib0021","series-title":"Proceedings of the Conference on Florida Artificial Intelligence Research Society (FLAIRS)","first-page":"695","article-title":"Adaptive k-means clustering.","author":"Bhatia","year":"2004"},{"key":"10.1016\/j.neucom.2017.04.074_bib0022","series-title":"Encyclopedia of Machine Learning and Data Mining","first-page":"1","article-title":"Adaptive resonance theory","author":"Carpenter","year":"2016"},{"key":"10.1016\/j.neucom.2017.04.074_bib0023","series-title":"Cluster analysis for researchers","author":"Romesburg","year":"1984"},{"key":"10.1016\/j.neucom.2017.04.074_bib0024","first-page":"1038","article-title":"Generalization in reinforcement learning: successful examples using sparse coarse coding","volume":"8","author":"Sutton","year":"1996","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2017.04.074_bib0025","doi-asserted-by":"crossref","first-page":"100","DOI":"10.1109\/TSSC.1968.300136","article-title":"A formal basis for the heuristic determination of minimum cost paths","volume":"4","author":"Hart","year":"1968","journal-title":"IEEE Trans. Syst. Sci. Cybern."},{"key":"10.1016\/j.neucom.2017.04.074_bib0026","series-title":"Adaptive Representations for Reinforcement Learning","author":"Whiteson","year":"2007"},{"key":"10.1016\/j.neucom.2017.04.074_bib0027","first-page":"1633","article-title":"Transfer learning for reinforcement learning domains: a survey","volume":"10","author":"Taylor","year":"2009","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.neucom.2017.04.074_bib0028","series-title":"Reinforcement Learning","first-page":"143","article-title":"Transfer in reinforcement learning: a framework and a survey","author":"Lazaric","year":"2012"},{"key":"10.1016\/j.neucom.2017.04.074_bib0029","series-title":"Proceedings of the 10th International Conference on Machine Learning","first-page":"330","article-title":"Multi-agent reinforcement learning: independent vs. cooperative agents","author":"Tan","year":"1993"},{"issue":"2","key":"10.1016\/j.neucom.2017.04.074_bib0030","doi-asserted-by":"crossref","first-page":"156","DOI":"10.1109\/TSMCC.2007.913919","article-title":"A comprehensive survey of multiagent reinforcement learning","volume":"38","author":"Busoniu","year":"2008","journal-title":"IEEE Trans. Systems Man Cybern. Part C Appl. Rev."}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231217311001?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231217311001?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2019,9,26]],"date-time":"2019-09-26T09:27:13Z","timestamp":1569490033000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0925231217311001"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,11]]},"references-count":30,"alternative-id":["S0925231217311001"],"URL":"https:\/\/doi.org\/10.1016\/j.neucom.2017.04.074","relation":{},"ISSN":["0925-2312"],"issn-type":[{"value":"0925-2312","type":"print"}],"subject":[],"published":{"date-parts":[[2017,11]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Identification and off-policy learning of multiple objectives using adaptive clustering","name":"articletitle","label":"Article Title"},{"value":"Neurocomputing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neucom.2017.04.074","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2017 Elsevier B.V. All rights reserved.","name":"copyright","label":"Copyright"}]}}