{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T07:31:44Z","timestamp":1725521504386},"publisher-location":"Berlin, Heidelberg","reference-count":11,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540897217"},{"type":"electronic","value":"9783540897224"}],"license":[{"start":{"date-parts":[[2008,1,1]],"date-time":"2008-01-01T00:00:00Z","timestamp":1199145600000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2008]]},"DOI":"10.1007\/978-3-540-89722-4_3","type":"book-chapter","created":{"date-parts":[[2008,11,26]],"date-time":"2008-11-26T08:57:14Z","timestamp":1227689834000},"page":"27-40","source":"Crossref","is-referenced-by-count":5,"title":["Algorithms and Bounds for Rollout Sampling Approximate Policy Iteration"],"prefix":"10.1007","author":[{"given":"Christos","family":"Dimitrakakis","sequence":"first","affiliation":[]},{"given":"Michail G.","family":"Lagoudakis","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"issue":"2-3","key":"3_CR1","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1023\/A:1013689704352","volume":"47","author":"P. Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., Fischer, P.: Finite-time analysis of the multiarmed bandit problem. Machine Learning Journal\u00a047(2-3), 235\u2013256 (2002)","journal-title":"Machine Learning Journal"},{"key":"3_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"454","DOI":"10.1007\/978-3-540-72927-3_33","volume-title":"Learning Theory 2007","author":"P. Auer","year":"2007","unstructured":"Auer, P., Ortner, R., Szepesvari, C.: Improved Rates for the Stochastic Continuum-Armed Bandit Problem. In: Bshouty, N.H., Gentile, C. (eds.) COLT 2007. LNCS, vol.\u00a04539, pp. 454\u2013468. Springer, Heidelberg (2007)"},{"key":"#cr-split#-3_CR3.1","doi-asserted-by":"crossref","unstructured":"Bertsekas, D.: Dynamic programming and suboptimal control: From ADP to MPC. Fundamental Issues in Control, European Journal of Control??11(4-5) (2005);","DOI":"10.3166\/ejc.11.310-334"},{"key":"#cr-split#-3_CR3.2","unstructured":"From 2005 CDC, Seville, Spain"},{"key":"3_CR4","doi-asserted-by":"crossref","unstructured":"Dimitrakakis, C., Lagoudakis, M.: Rollout sampling approximate policy iteration. Machine Learning\u00a072(3) (September 2008)","DOI":"10.1007\/s10994-008-5069-3"},{"key":"3_CR5","first-page":"1079","volume":"7","author":"E. Even-Dar","year":"2006","unstructured":"Even-Dar, E., Mannor, S., Mansour, Y.: Action elimination and stopping conditions for the multi-armed bandit and reinforcement learning problems. Journal of Machine Learning Research\u00a07, 1079\u20131105 (2006)","journal-title":"Journal of Machine Learning Research"},{"key":"3_CR6","unstructured":"Fern, A., Yoon, S., Givan, R.: Approximate policy iteration with a policy language bias. Advances in Neural Information Processing Systems\u00a016(3) (2004)"},{"key":"3_CR7","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1613\/jair.1700","volume":"25","author":"A. Fern","year":"2006","unstructured":"Fern, A., Yoon, S., Givan, R.: Approximate policy iteration with a policy language bias: Solving relational Markov decision processes. Journal of Artificial Intelligence Research\u00a025, 75\u2013118 (2006)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"3_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"282","DOI":"10.1007\/11871842_29","volume-title":"Machine Learning: ECML 2006","author":"L. Kocsis","year":"2006","unstructured":"Kocsis, L., Szepesv\u00e1ri, C.: Bandit based Monte-Carlo planning. In: F\u00fcrnkranz, J., Scheffer, T., Spiliopoulou, M. (eds.) ECML 2006. LNCS, vol.\u00a04212, pp. 282\u2013293. Springer, Heidelberg (2006)"},{"key":"3_CR9","unstructured":"Lagoudakis, M.G., Parr, R.: Reinforcement learning as classification: Leveraging modern classifiers. In: Proceedings of the 20th International Conference on Machine Learning (ICML), Washington, DC, USA, pp. 424\u2013431 (August 2003)"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"Langford, J., Zadrozny, B.: Relating reinforcement learning performance to classification performance. In: Proceedings of the 22nd International Conference on Machine learning (ICML), Bonn, Germany, pp. 473\u2013480 (2005)","DOI":"10.1145\/1102351.1102411"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-89722-4_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,3,4]],"date-time":"2019-03-04T00:01:38Z","timestamp":1551657698000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-89722-4_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008]]},"ISBN":["9783540897217","9783540897224"],"references-count":11,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-89722-4_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2008]]}}}