乐胖代购免代理版

{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T07:31:47Z","timestamp":1725521507582},"publisher-location":"Berlin, Heidelberg","reference-count":23,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540897217"},{"type":"electronic","value":"9783540897224"}],"license":[{"start":{"date-parts":[[2008,1,1]],"date-time":"2008-01-01T00:00:00Z","timestamp":1199145600000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2008]]},"DOI":"10.1007\/978-3-540-89722-4_5","type":"book-chapter","created":{"date-parts":[[2008,11,26]],"date-time":"2008-11-26T08:57:14Z","timestamp":1227689834000},"page":"55-68","source":"Crossref","is-referenced-by-count":8,"title":["Regularized Fitted Q-Iteration: Application to Planning"],"prefix":"10.1007","author":[{"given":"Amir massoud","family":"Farahmand","sequence":"first","affiliation":[]},{"given":"Mohammad","family":"Ghavamzadeh","sequence":"additional","affiliation":[]},{"given":"Csaba","family":"Szepesv\u00e1ri","sequence":"additional","affiliation":[]},{"given":"Shie","family":"Mannor","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"5_CR1","doi-asserted-by":"crossref","unstructured":"Antos, A., Szepesv\u00e1ri, C., Munos, R.: Value-iteration based fitted policy iteration: learning with a single trajectory. In: IEEE ADPRL, pp. 330\u2013337 (2007)","DOI":"10.1109\/ADPRL.2007.368207"},{"key":"5_CR2","unstructured":"Antos, A., Munos, R., Szepesv\u00e1ri, C.: Fitted Q-iteration in continuous action-space MDPs. In: Advances in Neural Information Processing Systems 20, NIPS 2007 (in print, 2008)"},{"key":"5_CR3","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1007\/s10994-007-5038-2","volume":"71","author":"A. Antos","year":"2008","unstructured":"Antos, A., Szepesv\u00e1ri, C., Munos, R.: Learning near-optimal policies with Bellman-residual minimization based fitted policy iteration and a single sample path. Machine Learning\u00a071, 89\u2013129 (2008)","journal-title":"Machine Learning"},{"key":"5_CR4","volume-title":"Stochastic Optimal Control (The Discrete Time Case)","author":"D.P. Bertsekas","year":"1978","unstructured":"Bertsekas, D.P., Shreve, S.E.: Stochastic Optimal Control (The Discrete Time Case). Academic Press, New York (1978)"},{"key":"5_CR5","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1214\/07-EJS008","volume":"1","author":"F. Bunea","year":"2007","unstructured":"Bunea, F., Tsybakov, A., Wegkamp, M.: Sparsity oracle inequalities for the lasso. Electronic Journal of Statistics\u00a01, 169\u2013194 (2007)","journal-title":"Electronic Journal of Statistics"},{"issue":"8","key":"5_CR6","doi-asserted-by":"publisher","first-page":"2275","DOI":"10.1109\/TSP.2004.830985","volume":"52","author":"Y. Engel","year":"2004","unstructured":"Engel, Y., Mannor, S., Meir, R.: The kernel recursive least squares algorithm. IEEE Transaction on Signal Processing\u00a052(8), 2275\u20132285 (2004)","journal-title":"IEEE Transaction on Signal Processing"},{"key":"5_CR7","first-page":"201","volume-title":"ICML 2005: Proceedings of the 22nd international conference on Machine learning","author":"Y. Engel","year":"2005","unstructured":"Engel, Y., Mannor, S., Meir, R.: Reinforcement learning with Gaussian processes. In: ICML 2005: Proceedings of the 22nd international conference on Machine learning, pp. 201\u2013208. ACM, New York (2005)"},{"key":"5_CR8","first-page":"503","volume":"6","author":"D. Ernst","year":"2005","unstructured":"Ernst, D., Geurts, P., Wehenkel, L.: Tree-based batch mode reinforcement learning. Journal of Machine Learning Research\u00a06, 503\u2013556 (2005)","journal-title":"Journal of Machine Learning Research"},{"key":"5_CR9","unstructured":"Farahmand, A.M., Ghavamzadeh, M., Szepesv\u00e1ri, C., Mannor, S.: Regularized policy iteration. In: Advances in Neural Information Processing Systems 21, NIPS 2008 (to appear, 2008)"},{"key":"5_CR10","doi-asserted-by":"publisher","DOI":"10.1007\/b97848","volume-title":"A distribution-free theory of nonparametric regression","author":"L. Gy\u00f6rfi","year":"2002","unstructured":"Gy\u00f6rfi, L., Kohler, M., Krzy\u017cak, A., Walk, H.: A distribution-free theory of nonparametric regression. Springer, New York (2002)"},{"key":"5_CR11","unstructured":"Jung, T., Polani, D.: Least squares SVM for least squares TD learning. In: ECAI, pp. 499\u2013503 (2006)"},{"key":"5_CR12","unstructured":"Kearns, M., Mansour, Y., Ng, A.Y.: A sparse sampling algorithm for near-optimal planning in large Markovian decision processes. In: Proceedings of IJCAI 1999, pp. 1324\u20131331 (1999)"},{"key":"5_CR13","unstructured":"Lagoudakis, M.G., Parr, R.: Reinforcement learning as classification: Leveraging modern classifiers. In: ICML 2003, pp. 424\u2013431 (2003)"},{"key":"5_CR14","doi-asserted-by":"crossref","unstructured":"Loth, M., Davy, M., Preux, P.: Sparse temporal difference learning using LASSO. In: IEEE International Symposium on Approximate Dynamic Programming and Reinforcement Learning (2007)","DOI":"10.1109\/ADPRL.2007.368210"},{"key":"5_CR15","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1007\/s10479-005-5732-z","volume":"134","author":"S. Mannor","year":"2005","unstructured":"Mannor, S., Menache, I., Shimkin, N.: Basis function adaptation in temporal difference reinforcement learning. Annals of Operations Research\u00a0134, 215\u2013238 (2005)","journal-title":"Annals of Operations Research"},{"key":"5_CR16","first-page":"815","volume":"9","author":"R. Munos","year":"2008","unstructured":"Munos, R., Szepesv\u00e1ri, C.: Finite-time bounds for fitted value iteration. Journal of Machine Learning Research\u00a09, 815\u2013857 (2008)","journal-title":"Journal of Machine Learning Research"},{"key":"5_CR17","unstructured":"Ng, A.Y., Jordan, M.: PEGASUS: A policy search method for large MDPs and POMDPs. In: Proceedings of the 16th Conference in Uncertainty in Artificial Intelligence, pp. 406\u2013415 (2000)"},{"key":"5_CR18","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1023\/A:1017928328829","volume":"49","author":"D. Ormoneit","year":"2002","unstructured":"Ormoneit, D., Sen, S.: Kernel-based reinforcement learning. Machine Learning\u00a049, 161\u2013178 (2002)","journal-title":"Machine Learning"},{"key":"5_CR19","doi-asserted-by":"crossref","unstructured":"Parr, R., Painter-Wakefield, C., Li, L., Littman, M.L.: Analyzing feature generation for value-function approximation. In: ICML, pp. 737\u2013744 (2007)","DOI":"10.1145\/1273496.1273589"},{"key":"5_CR20","volume-title":"Learning with Kernels","author":"B. Sch\u00f6lkopf","year":"2002","unstructured":"Sch\u00f6lkopf, B., Smola, A.J.: Learning with Kernels. MIT Press, Cambridge (2002)"},{"key":"5_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1007\/11776420_15","volume-title":"Learning Theory","author":"N. Srebro","year":"2006","unstructured":"Srebro, N., Ben-David, S.: Learning bounds for support vector machines with learned kernels. In: Lugosi, G., Simon, H.U. (eds.) COLT 2006. LNCS, vol.\u00a04005, pp. 169\u2013183. Springer, Heidelberg (2006)"},{"key":"5_CR22","doi-asserted-by":"publisher","first-page":"973","DOI":"10.1109\/TNN.2007.899161","volume":"18","author":"X. Xu","year":"2007","unstructured":"Xu, X., Hu, D., Lu, X.: Kernel-based least squares policy iteration for reinforcement learning. IEEE Trans. on Neural Networks\u00a018, 973\u2013992 (2007)","journal-title":"IEEE Trans. on Neural Networks"},{"key":"5_CR23","doi-asserted-by":"publisher","first-page":"1743","DOI":"10.1109\/TIT.2003.813564","volume":"49","author":"D.-X. Zhou","year":"2003","unstructured":"Zhou, D.-X.: Capacity of reproducing kernel spaces in learning theory. IEEE Transactions on Information Theory\u00a049, 1743\u20131752 (2003)","journal-title":"IEEE Transactions on Information Theory"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-89722-4_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,15]],"date-time":"2019-05-15T14:59:00Z","timestamp":1557932340000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-89722-4_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008]]},"ISBN":["9783540897217","9783540897224"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-89722-4_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2008]]}}}