{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T18:23:40Z","timestamp":1725560620288},"publisher-location":"Berlin, Heidelberg","reference-count":11,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540292432"},{"type":"electronic","value":"9783540316923"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2005]]},"DOI":"10.1007\/11564096_25","type":"book-chapter","created":{"date-parts":[[2005,11,9]],"date-time":"2005-11-09T11:54:27Z","timestamp":1131537267000},"page":"230-241","source":"Crossref","is-referenced-by-count":2,"title":["Towards Finite-Sample Convergence of Direct Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Shiau Hong","family":"Lim","sequence":"first","affiliation":[]},{"given":"Gerald","family":"DeJong","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"25_CR1","volume-title":"Neuro-Dynamic Programming","author":"D.P. Bertsekas","year":"1996","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Neuro-Dynamic Programming. Athena Scientific, Belmont (1996)"},{"key":"25_CR2","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1162\/153244303765208377","volume":"3","author":"R.I. Brafman","year":"2002","unstructured":"Brafman, R.I., Tennenholtz, M.: R-max, A General Polynomial Time Algorithm for Near-Optimal Reinforcement Learning. Journal of Machine Learning Research\u00a03, 213\u2013231 (2002)","journal-title":"Journal of Machine Learning Research"},{"key":"25_CR3","first-page":"1","volume":"5","author":"E. Even-Dar","year":"2003","unstructured":"Even-Dar, E., Mansour, Y.: Learning rates for Q-Learning. Journal of Machine Learning Research\u00a05, 1\u201325 (2003)","journal-title":"Journal of Machine Learning Research"},{"key":"25_CR4","unstructured":"Kaelbling, L.: Learning in Embedded Systems. PhD thesis, Computer Science Department, Stanford University (1990)"},{"key":"25_CR5","first-page":"260","volume-title":"Proc. of 15th ICML","author":"M. Kearns","year":"1998","unstructured":"Kearns, M., Singh, S.: Near-Optimal Reinforcement Learning in Polynomial Time. In: Proc. of 15th ICML, pp. 260\u2013268. Morgan Kaufmann, San Francisco (1998)"},{"key":"25_CR6","first-page":"996","volume-title":"Advances in Neural Information Processing Systems 11","author":"M. Kearns","year":"1999","unstructured":"Kearns, M., Singh, S.: Finite-Sample Rates of Convergence for Q-Learning and Indirect Methods. In: Advances in Neural Information Processing Systems 11, pp. 996\u20131002. The MIT Press, Cambridge (1999)"},{"issue":"1\/3","key":"25_CR7","first-page":"227","volume":"22","author":"S. Koenig","year":"1996","unstructured":"Koenig, S., Simmons, R.G.: The Effect of Representation and Knowledge on Goal-Directed Exploration with Reinforcement Learning Algorithms. Machine Learning\u00a022(1\/3), 227\u2013250 (1996)","journal-title":"Machine Learning"},{"key":"25_CR8","unstructured":"Rummery, G.A., Niranjan, M.: On-line Q-learning using connectionist systems. Tech. Report CUED\/F-INFENG\/TR 166, Cambridge University Engineering Dept. (1994)"},{"key":"25_CR9","first-page":"1038","volume-title":"Advances in Neural Information Processing Systems 8","author":"R. Sutton","year":"1996","unstructured":"Sutton, R.: Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding. In: Advances in Neural Information Processing Systems 8, pp. 1038\u20131044. MIT Press, Cambridge (1996)"},{"key":"25_CR10","volume-title":"Reinforcement Learning","author":"R. Sutton","year":"1998","unstructured":"Sutton, R., Barto, A.: Reinforcement Learning. MIT Press, Cambridge (1998)"},{"key":"25_CR11","unstructured":"Watkins, C.J.C.H.: Learning from Delayed Rewards. PhD thesis, Cambridge, England (1989)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning: ECML 2005"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/11564096_25.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,27]],"date-time":"2021-04-27T07:15:28Z","timestamp":1619507728000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/11564096_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2005]]},"ISBN":["9783540292432","9783540316923"],"references-count":11,"URL":"https:\/\/doi.org\/10.1007\/11564096_25","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2005]]}}}