{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T04:53:51Z","timestamp":1725684831902},"publisher-location":"Berlin, Heidelberg","reference-count":26,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642299452"},{"type":"electronic","value":"9783642299469"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-29946-9_12","type":"book-chapter","created":{"date-parts":[[2012,5,18]],"date-time":"2012-05-18T13:01:49Z","timestamp":1337346109000},"page":"89-101","source":"Crossref","is-referenced-by-count":7,"title":["\u21131-Penalized Projected Bellman Residual"],"prefix":"10.1007","author":[{"given":"Matthieu","family":"Geist","sequence":"first","affiliation":[]},{"given":"Bruno","family":"Scherrer","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"issue":"1","key":"12_CR1","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1007\/s10994-007-5038-2","volume":"71","author":"A. Antos","year":"2008","unstructured":"Antos, A., Szepesv\u00e1ri, C., Munos, R.: Learning near-optimal policies with Bellman-residual minimization based fitted policy iteration and a single sample path. Machine Learning\u00a071(1), 89\u2013129 (2008)","journal-title":"Machine Learning"},{"key":"12_CR2","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Neuro-Dynamic Programming. Athena Scientific"},{"issue":"2-3","key":"12_CR3","first-page":"233","volume":"49","author":"J.A. Boyan","year":"1999","unstructured":"Boyan, J.A.: Technical Update: Least-Squares Temporal Difference Learning. Machine Learning\u00a049(2-3), 233\u2013246 (1999)","journal-title":"Machine Learning"},{"issue":"1-3","key":"12_CR4","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1007\/BF00114723","volume":"22","author":"S.J. Bradtke","year":"1996","unstructured":"Bradtke, S.J., Barto, A.G.: Linear Least-Squares algorithms for temporal difference learning. Machine Learning\u00a022(1-3), 33\u201357 (1996)","journal-title":"Machine Learning"},{"key":"12_CR5","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1137\/S1064827596304010","volume":"20","author":"S.S. Chen","year":"1999","unstructured":"Chen, S.S., Donoho, D.L., Saunders, M.A.: Atomic Decomposition by Basis Pursuit. SIAM Journal on Scientific Computing\u00a020, 33\u201361 (1999)","journal-title":"SIAM Journal on Scientific Computing"},{"issue":"2","key":"12_CR6","doi-asserted-by":"publisher","first-page":"407","DOI":"10.1214\/009053604000000067","volume":"32","author":"B. Efron","year":"2004","unstructured":"Efron, B., Hastie, T., Johnstone, I., Tibshirani, R.: Least Angle Regression. Annals of Statistics\u00a032(2), 407\u2013499 (2004)","journal-title":"Annals of Statistics"},{"key":"12_CR7","unstructured":"Farahmand, A., Ghavamzadeh, M., Szepesv\u00e1ri, C., Mannor, S.: Regularized policy iteration. In: 22nd Annual Conference on Neural Information Processing Systems (NIPS 21), Vancouver, Canada (2008)"},{"key":"12_CR8","unstructured":"Ghavamzadeh, M., Lazaric, A., Munos, R., Hoffman, M.: Finite-Sample Analysis of Lasso-TD. In: International Conference on Machine Learning (2011)"},{"key":"12_CR9","doi-asserted-by":"crossref","unstructured":"Hoffman, M.W., Lazaric, A., Ghavamzadeh, M., Munos, R.: Regularized least squares temporal difference learning with nested \u21132 and \u21131 penalization. In: European Workshop on Reinforcement Learning (2011)","DOI":"10.1007\/978-3-642-29946-9_13"},{"key":"12_CR10","doi-asserted-by":"publisher","first-page":"385","DOI":"10.1145\/1273496.1273545","volume-title":"Proceedings of the 24th International Conference on Machine Learning, ICML 2007","author":"J. Johns","year":"2007","unstructured":"Johns, J., Mahadevan, S.: Constructing basis functions from directed graphs for value function approximation. In: Proceedings of the 24th International Conference on Machine Learning, ICML 2007, pp. 385\u2013392. ACM, New York (2007)"},{"key":"12_CR11","unstructured":"Johns, J., Painter-Wakefield, C., Parr, R.: Linear Complementarity for Regularized Policy Evaluation and Improvement. In: Lafferty, J., Williams, C.K.I., Shawe-Taylor, J., Zemel, R.S., Culotta, A. (eds.) NIPS 23, pp. 1009\u20131017 (2010)"},{"key":"12_CR12","doi-asserted-by":"crossref","unstructured":"Kolter, J.Z., Ng, A.Y.: Regularization and Feature Selection in Least-Squares Temporal Difference Learning. In: Proceedings of the 26th International Conference on Machine Learning (ICML 2009), Montreal, Canada (2009)","DOI":"10.1145\/1553374.1553442"},{"key":"12_CR13","doi-asserted-by":"crossref","unstructured":"Loth, M., Davy, M., Preux, P.: Sparse Temporal Difference Learning using LASSO. In: IEEE International Symposium on Approximate Dynamic Programming and Reinforcement Learning, Hawa\u00ef, USA (2007)","DOI":"10.1109\/ADPRL.2007.368210"},{"key":"12_CR14","unstructured":"Munos, R.: Error bounds for approximate policy iteration. In: International Conference on Machine Learning (2003)"},{"key":"12_CR15","doi-asserted-by":"publisher","first-page":"752","DOI":"10.1145\/1390156.1390251","volume-title":"Proceedings of the 25th International Conference on Machine Learning, ICML 2008","author":"R. Parr","year":"2008","unstructured":"Parr, R., Li, L., Taylor, G., Painter-Wakefield, C., Littman, M.L.: An analysis of linear models, linear value-function approximation, and feature selection for reinforcement learning. In: Proceedings of the 25th International Conference on Machine Learning, ICML 2008, pp. 752\u2013759. ACM, New York (2008)"},{"key":"12_CR16","doi-asserted-by":"crossref","unstructured":"Petrik, M., Taylor, G., Parr, R., Zilberstein, S.: Feature Selection Using Regularization in Approximate Linear Programs for Markov Decision Processes. In: Proceedings of ICML (2010)","DOI":"10.1145\/1553374.1553478"},{"issue":"3","key":"12_CR17","doi-asserted-by":"publisher","first-page":"1012","DOI":"10.1214\/009053606000001370","volume":"35","author":"S. Rosset","year":"2007","unstructured":"Rosset, S., Zhu, J.: Piecewise linear regularized solution paths. The Annals of Statistics\u00a035(3), 1012\u20131030 (2007)","journal-title":"The Annals of Statistics"},{"key":"12_CR18","unstructured":"Scherrer, B.: Should one compute the Temporal Difference fix point or minimize the Bellman Residual? The unified oblique projection view. In: 27th International Conference on Machine Learning - ICML 2010, Ha\u00effa, Isra\u00ebl (2010)"},{"key":"12_CR19","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction (Adaptive Computation and Machine Learning). The MIT Press (1998)"},{"key":"12_CR20","doi-asserted-by":"crossref","first-page":"993","DOI":"10.1145\/1553374.1553501","volume-title":"Proceedings of ICML","author":"R.S. Sutton","year":"2009","unstructured":"Sutton, R.S., Maei, H.R., Precup, D., Bhatnagar, S., Silver, D., Szepesv\u00e1ri, C., Wiewiora, E.: Fast gradient-descent methods for temporal-difference learning with linear function approximation. In: Proceedings of ICML, pp. 993\u20131000. ACM, New York (2009)"},{"key":"12_CR21","doi-asserted-by":"crossref","unstructured":"Szepesv\u00e1ri, C.: Algorithms for Reinforcement Learning. Morgan and Kaufmann (2010)","DOI":"10.2200\/S00268ED1V01Y201005AIM009"},{"key":"12_CR22","first-page":"1017","volume-title":"Proceedings of the 26th Annual International Conference on Machine Learning, ICML 2009","author":"G. Taylor","year":"2009","unstructured":"Taylor, G., Parr, R.: Kernelized value function approximation for reinforcement learning. In: Proceedings of the 26th Annual International Conference on Machine Learning, ICML 2009, pp. 1017\u20131024. ACM, New York (2009)"},{"key":"12_CR23","first-page":"3","volume":"32","author":"C. Thiery","year":"2009","unstructured":"Thiery, C., Scherrer, B.: Building Controllers for Tetris. International Computer Games Association Journal\u00a032, 3\u201311 (2009)","journal-title":"International Computer Games Association Journal"},{"issue":"1","key":"12_CR24","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1111\/j.2517-6161.1996.tb02080.x","volume":"58","author":"R. Tibshirani","year":"1996","unstructured":"Tibshirani, R.: Regression Shrinkage and Selection via the Lasso. Journal of the Royal Statistical Society. Series B (Methodological)\u00a058(1), 267\u2013288 (1996)","journal-title":"Journal of the Royal Statistical Society. Series B (Methodological)"},{"issue":"476","key":"12_CR25","doi-asserted-by":"publisher","first-page":"1418","DOI":"10.1198\/016214506000000735","volume":"101","author":"H. Zou","year":"2006","unstructured":"Zou, H.: The adaptive lasso and its oracle properties. Journal of the American Statistical Association\u00a0101(476), 1418\u20131429 (2006)","journal-title":"Journal of the American Statistical Association"},{"issue":"4","key":"12_CR26","doi-asserted-by":"publisher","first-page":"1733","DOI":"10.1214\/08-AOS625","volume":"37","author":"H. Zou","year":"2009","unstructured":"Zou, H., Zhang, H.H.: On the adaptive elastic-net with a diverging number of parameters. The Annals of Statistics\u00a037(4), 1733\u20131751 (2009)","journal-title":"The Annals of Statistics"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-29946-9_12.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,22]],"date-time":"2023-06-22T16:53:35Z","timestamp":1687452815000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-29946-9_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642299452","9783642299469"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-29946-9_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}