{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,3,15]],"date-time":"2024-03-15T09:21:15Z","timestamp":1710494475826},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"2-3","license":[{"start":{"date-parts":[[2015,7,14]],"date-time":"2015-07-14T00:00:00Z","timestamp":1436832000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2015,9]]},"DOI":"10.1007\/s10994-015-5518-8","type":"journal-article","created":{"date-parts":[[2015,7,13]],"date-time":"2015-07-13T18:39:43Z","timestamp":1436812783000},"page":"655-676","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Regularized feature selection in reinforcement learning"],"prefix":"10.1007","volume":"100","author":[{"ORCID":"http:\/\/orcid.org\/0000-0002-7336-840X","authenticated-orcid":false,"given":"Dean S.","family":"Wookey","sequence":"first","affiliation":[]},{"given":"George D.","family":"Konidaris","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,7,14]]},"reference":[{"key":"5518_CR1","doi-asserted-by":"crossref","first-page":"139","DOI":"10.1023\/A:1026289910256","volume":"18","author":"N Benoudjit","year":"2003","unstructured":"Benoudjit, N., & Verleysen, M. (2003). On the kernel widths in radial-basis function networks. Neural Processing Letters, 18, 139\u2013154.","journal-title":"Neural Processing Letters"},{"issue":"1","key":"5518_CR2","first-page":"33","volume":"22","author":"S Bradtke","year":"1996","unstructured":"Bradtke, S., & Barto, A. (1996). Linear least-squares algorithms for temporal difference learning. Machine Learning, 22(1), 33\u201357.","journal-title":"Machine Learning"},{"key":"5518_CR3","unstructured":"Dabney, W., & Barto, A. (2012). Adaptive step-size for online temporal difference learning. In Twenty-Sixth AAAI Conference on Artificial Intelligence (pp. 872\u2013878)."},{"key":"5518_CR4","unstructured":"Ernst, D., Geurts, P., & Wehenkel, L. (2005). Tree-based batch mode reinforcement learning. Journal of Machine Learning Research, 6, 503\u2013556."},{"key":"5518_CR5","unstructured":"Geist, M., Scherrer, B., Lazaric, A., & Ghavamzadeh, M. (2012). A Dantzig selector approach to temporal difference learning. In Proceedings of the 29th international conference on machine learning (pp. 1399\u20131406)."},{"key":"5518_CR6","unstructured":"Geramifard, A. (2013). Rc-car domain. http:\/\/acl.mit.edu\/RLPy\/api\/domains_misc.html#rccar"},{"key":"5518_CR7","first-page":"721","volume":"23","author":"M Ghavamzadeh","year":"2010","unstructured":"Ghavamzadeh, M., Lazaric, A., Maillard, O., & Munos, R. (2010). LSTD with random projections. Advances in Neural Information Processing Systems, 23, 721\u2013729.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"5518_CR8","unstructured":"Ghavamzadeh, M., Lazaric, A., Munos, R., & Hoffman, M. (2011). Finite-sample analysis of Lasso-TD. In Proceedings of the 28th international conference on machine learning (pp. 1177\u20131184)."},{"key":"5518_CR9","first-page":"1009","volume":"23","author":"J Johns","year":"2010","unstructured":"Johns, J., Painter-Wakefield, C., & Parr, R. (2010). Linear complementarity for regularized policy evaluation and improvement. Advances in Neural Information Processing Systems, 23, 1009\u20131017.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"5518_CR10","unstructured":"Jonschkowski, R., & Brock, O. (2013). Learning task-specific state representations by maximizing slowness and predictability. In 6th international workshop on Evolutionary and Reinforcement Learning for Autonomous Robot Systems (ERLARS)."},{"key":"5518_CR11","doi-asserted-by":"crossref","unstructured":"Kolter, J., & Ng, A. (2009). Regularization and feature selection in least-squares temporal difference learning. In Proceedings of the 26th annual international conference on machine learning (pp. 521\u2013528).","DOI":"10.1145\/1553374.1553442"},{"key":"5518_CR12","first-page":"1015","volume":"22","author":"G Konidaris","year":"2009","unstructured":"Konidaris, G., & Barto, A. (2009). Skill discovery in continuous reinforcement learning domains using skill chaining. Advances in Neural Information Processing Systems, 22, 1015\u20131023.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"5518_CR13","doi-asserted-by":"crossref","unstructured":"Konidaris, G., Osentoski, S., & Thomas, P. (2011). Value function approximation in reinforcement learning using the Fourier basis. In Proceedings of the twenty-fifth conference on artificial intelligence (pp. 380\u2013385).","DOI":"10.1609\/aaai.v25i1.7903"},{"key":"5518_CR14","first-page":"1107","volume":"4","author":"M Lagoudakis","year":"2003","unstructured":"Lagoudakis, M., & Parr, R. (2003). Least-squares policy iteration. Journal of Machine Learning Research, 4, 1107\u20131149.","journal-title":"Journal of Machine Learning Research"},{"key":"5518_CR15","first-page":"1540","volume":"23","author":"S Mahadevan","year":"2010","unstructured":"Mahadevan, S., & Liu, B. (2010). Basis construction from power series expansions of value functions. Advances in Neural Information Processing Systems, 23, 1540\u20131548.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"5518_CR16","unstructured":"Mahadevan, S., & Maggioni, M. (2007). Proto-value functions: A laplacian framework for learning representation and control in markov decision processes. Journal of Machine Learning Research, 8, 2169\u20132231."},{"issue":"12","key":"5518_CR17","doi-asserted-by":"crossref","first-page":"3397","DOI":"10.1109\/78.258082","volume":"41","author":"S Mallat","year":"1993","unstructured":"Mallat, S., & Zhang, Z. (1993). Matching pursuits with time\u2013frequency dictionaries. IEEE Transactions on Signal Processing, 41(12), 3397\u20133415.","journal-title":"IEEE Transactions on Signal Processing"},{"issue":"3","key":"5518_CR18","doi-asserted-by":"crossref","first-page":"317","DOI":"10.1007\/s10208-008-9031-3","volume":"9","author":"D Needell","year":"2009","unstructured":"Needell, D., & Vershynin, R. (2009). Uniform uncertainty principle and signal recovery via regularized orthogonal matching pursuit. Foundations of Computational Mathematics, 9(3), 317\u2013334.","journal-title":"Foundations of Computational Mathematics"},{"key":"5518_CR19","unstructured":"Painter-Wakefield, C., & Parr, R. (2012). Greedy algorithms for sparse reinforcement learning. In Proceedings of the 29th international conference on machine learning (pp. 1391\u20131398)."},{"key":"5518_CR20","doi-asserted-by":"crossref","unstructured":"Parr, R., Painter-Wakefield, C., Li, L., & Littman, M. (2007). Analyzing feature generation for value-function approximation. In Proceedings of the 24th international conference on Machine learning (pp. 737\u2013744).","DOI":"10.1145\/1273496.1273589"},{"key":"5518_CR21","doi-asserted-by":"crossref","unstructured":"Parr, R., Li, L., Taylor, G., Painter-Wakefield, C., & Littman, M. (2008). An analysis of linear models, linear value-function approximation, and feature selection for reinforcement learning. In Proceedings of the 25th international conference on machine learning (pp. 752\u2013759).","DOI":"10.1145\/1390156.1390251"},{"key":"5518_CR22","unstructured":"Petrik, M., Taylor, G., Parr, R., & Zilberstein, S. (2010). Feature selection using regularization in approximate linear programs for Markov decision processes. In Proceedings of the 27th international conference on machine learning (pp. 871\u2013878)."},{"key":"5518_CR23","unstructured":"Sprague, N. (2009). Predictive projections. In Proceedings of the 21st international joint conference on Artifical intelligence (pp. 1223\u20131229)."},{"key":"5518_CR24","unstructured":"Sun, Y., Gomez, F.J., Ring, M.B., & Schmidhuber, J. (2011). Incremental basis construction from temporal difference error. In Proceedings of the 28th international conference on machine learning (pp. 481\u2013488)."},{"key":"5518_CR25","volume-title":"Introduction to reinforcement learning","author":"R Sutton","year":"1998","unstructured":"Sutton, R., & Barto, A. (1998). Introduction to reinforcement learning. Cambridge: MIT Press."},{"issue":"1","key":"5518_CR26","first-page":"9","volume":"3","author":"RS Sutton","year":"1988","unstructured":"Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. Machine Learning, 3(1), 9\u201344.","journal-title":"Machine Learning"},{"key":"5518_CR27","unstructured":"Taylor, M., Kuhlmann, G., & Stone, P. (2008). Autonomous transfer for reinforcement learning. In The seventh international joint conference on autonomous agents and multiagent systems (pp. 283\u2013290)."},{"key":"5518_CR28","first-page":"1035","volume":"5","author":"A Tikhonov","year":"1963","unstructured":"Tikhonov, A. (1963). Solution of incorrectly formulated problems and the regularization method. Soviet Mathematics Doklady, 5, 1035.","journal-title":"Soviet Mathematics Doklady"},{"key":"5518_CR29","unstructured":"Van Roy, B. (1998). Learning and value function approximation in complex decision processes. PhD thesis, Massachusetts Institute of Technology."},{"key":"5518_CR30","doi-asserted-by":"crossref","unstructured":"Wahba, G. (1990). Spline models for observational data (Vol. 59). Philadelphia: SIAM.","DOI":"10.1137\/1.9781611970128"},{"issue":"7","key":"5518_CR31","doi-asserted-by":"crossref","first-page":"1515","DOI":"10.1109\/TAC.2009.2022097","volume":"54","author":"H Yu","year":"2009","unstructured":"Yu, H., & Bertsekas, D. P. (2009). Convergence results for some temporal difference methods based on least squares. Automatic Control, IEEE Transactions on, 54(7), 1515\u20131531.","journal-title":"Automatic Control, IEEE Transactions on"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-015-5518-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10994-015-5518-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-015-5518-8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,12]],"date-time":"2023-08-12T05:52:17Z","timestamp":1691819537000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10994-015-5518-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,7,14]]},"references-count":31,"journal-issue":{"issue":"2-3","published-print":{"date-parts":[[2015,9]]}},"alternative-id":["5518"],"URL":"https:\/\/doi.org\/10.1007\/s10994-015-5518-8","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,7,14]]}}}