{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,6,23]],"date-time":"2024-06-23T05:51:34Z","timestamp":1719121894075},"reference-count":47,"publisher":"Elsevier BV","issue":"1-2","license":[{"start":{"date-parts":[[1998,4,1]],"date-time":"1998-04-01T00:00:00Z","timestamp":891388800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2013,7,25]],"date-time":"2013-07-25T00:00:00Z","timestamp":1374710400000},"content-version":"vor","delay-in-days":5594,"URL":"https:\/\/www.elsevier.com\/open-access\/userlicense\/1.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Artificial Intelligence"],"published-print":{"date-parts":[[1998,4]]},"DOI":"10.1016\/s0004-3702(98)00002-2","type":"journal-article","created":{"date-parts":[[2002,7,25]],"date-time":"2002-07-25T12:04:40Z","timestamp":1027598680000},"page":"177-224","source":"Crossref","is-referenced-by-count":54,"title":["Model-based average reward reinforcement learning"],"prefix":"10.1016","volume":"100","author":[{"given":"Prasad","family":"Tadepalli","sequence":"first","affiliation":[]},{"given":"DoKyeong","family":"Ok","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/S0004-3702(98)00002-2_BIB1","doi-asserted-by":"crossref","first-page":"37","DOI":"10.1007\/BF00153759","article-title":"Instance-based learning algorithms","volume":"6","author":"Aha","year":"1991","journal-title":"Machine Learning"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB2","doi-asserted-by":"crossref","first-page":"11","DOI":"10.1023\/A:1006559212014","article-title":"Locally weighted learning","volume":"11","author":"Atkeson","year":"1997","journal-title":"Artificial Intelligence Review"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB3","doi-asserted-by":"crossref","first-page":"81","DOI":"10.1016\/0004-3702(94)00011-O","article-title":"Learning to act using real-time dynamic programming","volume":"73","author":"Barto","year":"1995","journal-title":"Artificial Intelligence"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB4","article-title":"A new value-iteration method for the average cost dynamic programming problem","author":"Bertsekas","year":"1995"},{"issue":"3","key":"10.1016\/S0004-3702(98)00002-2_BIB5","doi-asserted-by":"crossref","DOI":"10.1109\/TAC.1982.1102980","article-title":"Distributed dynamic programming","volume":"27","author":"Bertsekas","year":"1982","journal-title":"IEEE Trans. Automatic Control"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB6","series-title":"Dynamic Programming and Optimal Control","author":"Bertsekas","year":"1995"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB7","series-title":"Proceedings 14th International Joint Conference on Artificial Intelligence (IJCAI-95)","article-title":"Exploiting structure in policy construction","author":"Boutilier","year":"1995"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB8","series-title":"Proceedings Neural Information Processing Systems","article-title":"Generalizing reinforcement learning: safely approximating the value function","author":"Boyan","year":"1994"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB9","series-title":"Classification and Regression Trees","author":"Brieman","year":"1984"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB10","series-title":"Applied Probability and Statistical Methods","author":"Canavos","year":"1984"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB11","article-title":"Improving elevator performance using reinforcement learning","volume":"Vol. 8","author":"Crites","year":"1996"},{"issue":"3","key":"10.1016\/S0004-3702(98)00002-2_BIB12","doi-asserted-by":"crossref","first-page":"142","DOI":"10.1111\/j.1467-8640.1989.tb00324.x","article-title":"A model for reasoning about persistence and causation","volume":"5","author":"Dean","year":"1989","journal-title":"Computational Intelligence"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB13","doi-asserted-by":"crossref","first-page":"431","DOI":"10.1109\/TIT.1972.1054809","article-title":"The reduced nearest neighbor rule","author":"Gates","year":"1972","journal-title":"IEEE Trans. Inform. Theory"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB14","doi-asserted-by":"crossref","first-page":"515","DOI":"10.1109\/TIT.1968.1054155","article-title":"The condensed nearest neighbor rule","volume":"14","author":"Hart","year":"1968","journal-title":"IEEE Trans. Inform. Theory"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB15","series-title":"Dynamic Programming and Markov Processes","author":"Howard","year":"1960"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB16","series-title":"IEEE Proceedings 28th Conference on Decision and Control","article-title":"Computationally efficient adaptive control algorithms for markov chains","author":"Jalali","year":"1989"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB17","series-title":"Learning in Embedded Systems","author":"Kaelbling","year":"1990"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB18","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","article-title":"Reinforcement learning: a survey","volume":"4","author":"Kaelbling","year":"1996","journal-title":"J. Artificial Intelligence Research"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB19","series-title":"Proceedings 8th Conference on Uncertainty in Artificial Intelligence","first-page":"121","article-title":"A computational scheme for reasoning in dynamic probabilistic networks","author":"Kjaerulff","year":"1992"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB20","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1007\/BF00114729","article-title":"The effect of representation and knowledge on goal-directed exploration with reinforcement-learning algorithms","volume":"22","author":"Koenig","year":"1996","journal-title":"Machine Learning"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB21","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1007\/BF00992699","article-title":"Self-improving reactive agents based on reinforcement learning, planning, and teaching","volume":"8","author":"Lin","year":"1992","journal-title":"Machine Learning"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB22","series-title":"Proceedings of International Machine Learning Conference","first-page":"362","article-title":"Learning policies for partially observable environments: scaling up","author":"Littman","year":"1995"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB23","series-title":"Proceedings National Conference on Artificial Intelligence (AAAI-96)","article-title":"An average reward reinforcement learning algorithm for computing bias-optimal policies","author":"Mahadevan","year":"1996"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB24","doi-asserted-by":"crossref","first-page":"159","DOI":"10.1007\/BF00114727","article-title":"Average reward reinforcement learning: foundations, algorithms, and empirical results","volume":"22","author":"Mahadevan","year":"1996","journal-title":"Machine Learning"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB25","series-title":"Proceedings International Machine Learning Conference","article-title":"Sensitive discount optimality: Unifying discounted and average reward reinforcement learning","author":"Mahadevan","year":"1996"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB26","doi-asserted-by":"crossref","first-page":"311","DOI":"10.1016\/0004-3702(92)90058-6","article-title":"Automatic programming of behavior-based robots using reinforcement learning","volume":"55","author":"Mahadevan","year":"1992","journal-title":"Artificial Intelligence"},{"issue":"2","key":"10.1016\/S0004-3702(98)00002-2_BIB27","first-page":"114","article-title":"Design of automatic guided vehicle systems","volume":"14","author":"Maxwell","year":"1982","journal-title":"Institute of Industrial Engineers Trans."},{"key":"10.1016\/S0004-3702(98)00002-2_BIB28","doi-asserted-by":"crossref","first-page":"103","DOI":"10.1007\/BF00993104","article-title":"Prioritized sweeping: Reinforcement learning with less data and less time","volume":"13","author":"Moore","year":"1993","journal-title":"Machine Learning J."},{"key":"10.1016\/S0004-3702(98)00002-2_BIB29","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1023\/A:1006511328852","article-title":"Locally weighted learning for control","volume":"11","author":"Moore","year":"1997","journal-title":"Artificial Intelligence Review"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB30","series-title":"ECAI 92: 10th European Conference on Artificial Intelligence Proceedings","first-page":"689","article-title":"The data association problem when monitoring robot vehicles using dynamic belief networks","author":"Nicholson","year":"1992"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB31","article-title":"A study of model-based average reward reinforcement learning","author":"Ok","year":"1996"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB32","series-title":"Proceedings National Conference on Artificial Intelligence (AAAI-96)","article-title":"Auto-exploratory average reward reinforcement learning","author":"Ok","year":"1996"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB33","series-title":"Proceedings National Conference on Artificial Intelligence (AAAI-94)","first-page":"1088","article-title":"Approximating optimal policies for partially observable stochastic domains","author":"Parr","year":"1994"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB34","series-title":"Markov Decision Processes: Discrete Dynamic Stochastic Programming","author":"Puterman","year":"1994"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB35","series-title":"Artificial Intelligence: A Modern Approach","author":"Russell","year":"1995"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB36","first-page":"57","article-title":"Robot juggling: an implementation of memory-based learning","volume":"Vol. 14","author":"Schaal","year":"1994"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB37","series-title":"Proceedings 10th International Conference on Machine Learning","article-title":"A reinforcement learning method for maximizing undiscounted rewards","author":"Schwartz","year":"1993"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB38","series-title":"Proceedings National Conference on Artificial Intelligence (AAAI-94)","article-title":"Reinforcement learning algorithms for average-payoff markovian decision processes","author":"Singh","year":"1994"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB39","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1007\/BF00115009","article-title":"Learning to predict by the methods of temporal differences","volume":"3","author":"Sutton","year":"1988","journal-title":"Machine Learning"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB40","series-title":"Proceedings Seventh International Conference on Machine Learning","article-title":"Integrating architectures for learning, planning and reacting based on approximating dynamic programming","author":"Sutton","year":"1990"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB41","article-title":"H-learning: A reinforcement learning method for optimizing undiscounted average reward","author":"Tadepalli","year":"1994"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB42","series-title":"Proceedings 13th International Conference on Machine Learning","article-title":"Scaling up average reward reinforcement learning by approximating the domain models and the value function","author":"Tadepalli","year":"1996"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB43","series-title":"Proceedings 10th International Conference on Machine Learning","article-title":"Multi-agent reinforcement learning: independent vs. cooperative agents","author":"Tan","year":"1993"},{"issue":"3\u20134","key":"10.1016\/S0004-3702(98)00002-2_BIB44","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1007\/BF00992697","article-title":"Practical issues in temporal difference learning","volume":"8","author":"Tesauro","year":"1992","journal-title":"Machine Learning"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB45","article-title":"The role of exploration in learning control","author":"Thrun","year":"1994"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB46","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1007\/BF00992698","article-title":"Q-learning","volume":"8","author":"Watkins","year":"1992","journal-title":"Machine Learning"},{"key":"10.1016\/S0004-3702(98)00002-2_BIB47","series-title":"Proceedings 14th International Joint Conference on Artificial Intelligence (IJCAI-95)","article-title":"A reinforcement learning approach to job-shop scheduling","author":"Zhang","year":"1995"}],"container-title":["Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0004370298000022?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0004370298000022?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2020,1,10]],"date-time":"2020-01-10T18:51:02Z","timestamp":1578682262000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0004370298000022"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[1998,4]]},"references-count":47,"journal-issue":{"issue":"1-2","published-print":{"date-parts":[[1998,4]]}},"alternative-id":["S0004370298000022"],"URL":"https:\/\/doi.org\/10.1016\/s0004-3702(98)00002-2","relation":{},"ISSN":["0004-3702"],"issn-type":[{"value":"0004-3702","type":"print"}],"subject":[],"published":{"date-parts":[[1998,4]]}}}