{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T13:59:15Z","timestamp":1725803955036},"publisher-location":"Berlin, Heidelberg","reference-count":25,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783662448441"},{"type":"electronic","value":"9783662448458"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-662-44845-8_3","type":"book-chapter","created":{"date-parts":[[2014,9,1]],"date-time":"2014-09-01T12:51:33Z","timestamp":1409575893000},"page":"35-50","source":"Crossref","is-referenced-by-count":3,"title":["Local Policy Search in a Convex Space and Conservative Policy Iteration as Boosted Policy Search"],"prefix":"10.1007","author":[{"given":"Bruno","family":"Scherrer","sequence":"first","affiliation":[]},{"given":"Matthieu","family":"Geist","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"3_CR1","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1007\/s10994-007-5038-2","volume":"71","author":"A. Antos","year":"2008","unstructured":"Antos, A., Szepesvari, C., Munos, R.: Learning near-optimal policies with Bellman-residual minimization based fitted policy iteration and a single sample path. Machine Learning Journal\u00a071, 89\u2013129 (2008)","journal-title":"Machine Learning Journal"},{"key":"3_CR2","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1057\/jors.1995.50","volume":"46","author":"T. Archibald","year":"1995","unstructured":"Archibald, T., McKinnon, K., Thomas, L.: On the Generation of Markov Decision Processes. Journal of the Operational Research Society\u00a046, 354\u2013361 (1995)","journal-title":"Journal of the Operational Research Society"},{"key":"3_CR3","doi-asserted-by":"crossref","first-page":"319","DOI":"10.1613\/jair.806","volume":"15","author":"J. Baxter","year":"2001","unstructured":"Baxter, J., Bartlett, P.L.: Infinite-horizon gradient-based policy search. Journal of Artificial Intelligence Research (JAIR)\u00a015, 319\u2013350 (2001)","journal-title":"Journal of Artificial Intelligence Research (JAIR)"},{"key":"3_CR4","unstructured":"Bertsekas, D., Tsitsiklis, J.: Neuro-Dynamic Programming. Athena Scientific (1996)"},{"key":"3_CR5","unstructured":"Bertsekas, D.P.: Dynamic Programming and Optimal Control. Athena Scientific (1995)"},{"key":"3_CR6","unstructured":"Bhatnagar, S., Sutton, R.S., Ghavamzadeh, M., Lee, M.: Incremental natural actor-critic algorithms. In: Advances in Neural Information Processing Systems, NIPS (2007)"},{"key":"3_CR7","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1613\/jair.1700","volume":"25","author":"A. Fern","year":"2006","unstructured":"Fern, A., Yoon, S., Givan, R.: Approximate Policy Iteration with a Policy Language Bias: Solving Relational Markov Decision Processes. Journal of Artificial Intelligence Research (JAIR)\u00a025, 75\u2013118 (2006)","journal-title":"Journal of Artificial Intelligence Research (JAIR)"},{"key":"3_CR8","unstructured":"Ghavamzadeh, M., Lazaric, A.: Conservative and Greedy Approaches to Classification-based Policy Iteration. In: Conference on Artificial Intelligence, AAAI (2012)"},{"key":"3_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"428","DOI":"10.1007\/978-3-540-87700-4_43","volume-title":"Parallel Problem Solving from Nature \u2013 PPSN X","author":"V. Heidrich-Meisner","year":"2008","unstructured":"Heidrich-Meisner, V., Igel, C.: Evolution strategies for direct policy search. In: Rudolph, G., Jansen, T., Lucas, S., Poloni, C., Beume, N. (eds.) PPSN 2008. LNCS, vol.\u00a05199, pp. 428\u2013437. Springer, Heidelberg (2008)"},{"key":"3_CR10","unstructured":"Kakade, S.: A Natural Policy Gradient. In: Advances in Neural Information Processing Systems, NIPS (2001)"},{"key":"3_CR11","unstructured":"Kakade, S., Langford, J.: Approximately optimal approximate reinforcement learning. In: International Conference on Machine Learning, ICML (2002)"},{"key":"3_CR12","doi-asserted-by":"crossref","unstructured":"Kober, J., Peters, J.: Policy Search for Motor Primitives in Robotics. Machine Learning pp. 171\u2013203 (2011)","DOI":"10.1007\/s10994-010-5223-6"},{"key":"3_CR13","first-page":"1107","volume":"4","author":"M. Lagoudakis","year":"2003","unstructured":"Lagoudakis, M., Parr, R.: Least-squares policy iteration. Journal of Machine Learning Research (JMLR)\u00a04, 1107\u20131149 (2003)","journal-title":"Journal of Machine Learning Research (JMLR)"},{"key":"3_CR14","unstructured":"Lagoudakis, M., Parr, R.: Reinforcement learning as classification: Leveraging modern classifiers. In: International Conference on Machine Learning, ICML (2003)"},{"key":"3_CR15","first-page":"3041","volume":"13","author":"A. Lazaric","year":"2011","unstructured":"Lazaric, A., Ghavamzadeh, M., Munos, R.: Finite-sample analysis of least-squares policy iteration. Journal of Machine Learning Research\u00a013, 3041\u20133074 (2011)","journal-title":"Journal of Machine Learning Research"},{"key":"3_CR16","unstructured":"Lazaric, A., Ghavamzadeh, M., Munos, R.: Analysis of a classification-based policy iteration algorithm. In: International Conference on Machine Learning, ICML (2010)"},{"key":"3_CR17","unstructured":"Mason, L., Baxter, J., Bartlett, P., Frean, M.: Boosting algorithms as gradient descent in function space. Tech. rep., Australian National University (1999)"},{"key":"3_CR18","unstructured":"Munos, R.: Error bounds for approximate policy iteration. In: International Conference on Machine Learning, ICML (2003)"},{"key":"3_CR19","doi-asserted-by":"crossref","unstructured":"Munos, R.: Performance bounds in Lp norm for approximate value iteration. SIAM Journal on Control and Optimization (2007)","DOI":"10.1137\/040614384"},{"key":"3_CR20","doi-asserted-by":"publisher","first-page":"1180","DOI":"10.1016\/j.neucom.2007.11.026","volume":"71","author":"J. Peters","year":"2008","unstructured":"Peters, J., Schaal, S.: Natural Actor-Critic. Neurocomputing\u00a071, 1180\u20131190 (2008)","journal-title":"Neurocomputing"},{"key":"3_CR21","doi-asserted-by":"crossref","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. Wiley-Interscience (1994)","DOI":"10.1002\/9780470316887"},{"key":"3_CR22","unstructured":"Scherrer, B., Gabillon, V., Ghavamzadeh, M., Geist, M.: Approximate Modified Policy Iteration. In: International Conference on Machine Learning, ICML (2012)"},{"key":"3_CR23","unstructured":"Scherrer, B., Lesner, B.: On the Use of Non-Stationary Policies for Stationary Infinite-Horizon Markov Decision Processes. In: Advances in Neural Information Processing Systems, NIPS (2012)"},{"key":"3_CR24","doi-asserted-by":"crossref","unstructured":"Sutton, R., Barto, A.: Reinforcement Learning, An introduction. The MIT Press (1998)","DOI":"10.1109\/TNN.1998.712192"},{"key":"3_CR25","unstructured":"Sutton, R.S., McAllester, D.A., Singh, S.P., Mansour, Y.: Policy Gradient Methods for Reinforcement Learning with Function Approximation. In: Advances in Neural Information Processing Systems, NIPS (1999)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-662-44845-8_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,15]],"date-time":"2019-09-15T00:16:14Z","timestamp":1568506574000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-662-44845-8_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783662448441","9783662448458"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-662-44845-8_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2014]]}}}