乐胖代购免代理版

{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T04:53:53Z","timestamp":1725684833199},"publisher-location":"Berlin, Heidelberg","reference-count":21,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642299452"},{"type":"electronic","value":"9783642299469"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-29946-9_14","type":"book-chapter","created":{"date-parts":[[2012,5,18]],"date-time":"2012-05-18T17:01:49Z","timestamp":1337360509000},"page":"115-127","source":"Crossref","is-referenced-by-count":1,"title":["Recursive Least-Squares Learning with Eligibility Traces"],"prefix":"10.1007","author":[{"given":"Bruno","family":"Scherrer","sequence":"first","affiliation":[]},{"given":"Matthieu","family":"Geist","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"14_CR1","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"574","DOI":"10.1007\/11776420_42","volume-title":"Learning Theory","author":"A. Antos","year":"2006","unstructured":"Antos, A., Szepesv\u00e1ri, C., Munos, R.: Learning Near-Optimal Policies with Bellman-Residual Minimization Based Fitted Policy Iteration and a Single Sample Path. In: Lugosi, G., Simon, H.U. (eds.) COLT 2006. LNCS (LNAI), vol.\u00a04005, pp. 574\u2013588. Springer, Heidelberg (2006)"},{"key":"14_CR2","doi-asserted-by":"crossref","unstructured":"Baird, L.C.: Residual Algorithms: Reinforcement Learning with Function Approximation. In: ICML (1995)","DOI":"10.1016\/B978-1-55860-377-6.50013-X"},{"issue":"1","key":"14_CR3","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1016\/j.cam.2008.07.037","volume":"227","author":"D.P. Bertsekas","year":"2009","unstructured":"Bertsekas, D.P., Yu, H.: Projected Equation Methods for Approximate Solution of Large Linear Systems. J. Comp. and Applied Mathematics\u00a0227(1), 27\u201350 (2009)","journal-title":"J. Comp. and Applied Mathematics"},{"key":"14_CR4","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Neuro-Dynamic Programming. Athena Scientific (1996)"},{"issue":"2-3","key":"14_CR5","first-page":"233","volume":"49","author":"J.A. Boyan","year":"1999","unstructured":"Boyan, J.A.: Technical Update: Least-Squares Temporal Difference Learning. Machine Learning\u00a049(2-3), 233\u2013246 (1999)","journal-title":"Machine Learning"},{"issue":"1-3","key":"14_CR6","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1007\/BF00114723","volume":"22","author":"S.J. Bradtke","year":"1996","unstructured":"Bradtke, S.J., Barto, A.G.: Linear Least-Squares algorithms for temporal difference learning. Machine Learning\u00a022(1-3), 33\u201357 (1996)","journal-title":"Machine Learning"},{"key":"14_CR7","first-page":"207","volume":"16","author":"D. Choi","year":"2006","unstructured":"Choi, D., Van Roy, B.: A Generalized Kalman Filter for Fixed Point Approximation and Efficient Temporal-Difference Learning. DEDS\u00a016, 207\u2013239 (2006)","journal-title":"DEDS"},{"key":"14_CR8","unstructured":"Engel, Y.: Algorithms and Representations for Reinforcement Learning. Ph.D. thesis, Hebrew University (2005)"},{"key":"14_CR9","doi-asserted-by":"crossref","unstructured":"Geist, M., Pietquin, O.: Eligibility Traces through Colored Noises. In: ICUMT (2010)","DOI":"10.1109\/ICUMT.2010.5676597"},{"key":"14_CR10","doi-asserted-by":"crossref","first-page":"483","DOI":"10.1613\/jair.3077","volume":"39","author":"M. Geist","year":"2010","unstructured":"Geist, M., Pietquin, O.: Kalman Temporal Differences. JAIR\u00a039, 483\u2013532 (2010)","journal-title":"JAIR"},{"key":"14_CR11","doi-asserted-by":"crossref","unstructured":"Geist, M., Pietquin, O.: Parametric Value Function Approximation: a Unified View. In: ADPRL (2011)","DOI":"10.1109\/ADPRL.2011.5967355"},{"key":"14_CR12","unstructured":"Kearns, M., Singh, S.: Bias-Variance Error Bounds for Temporal Difference Updates. In: COLT (2000)"},{"key":"14_CR13","doi-asserted-by":"crossref","unstructured":"Maei, H.R., Sutton, R.S.: GQ(\u03bb): A general gradient algorithm for temporal-difference prediction learning with eligibility traces. In: Conference on Artificial General Intelligence (2010)","DOI":"10.2991\/agi.2010.22"},{"key":"14_CR14","unstructured":"Munos, R.: Error Bounds for Approximate Policy Iteration. In: ICML (2003)"},{"key":"14_CR15","first-page":"79","volume":"13","author":"A. Nedi\u0107","year":"2003","unstructured":"Nedi\u0107, A., Bertsekas, D.P.: Least Squares Policy Evaluation Algorithms with Linear Function Approximation. DEDS\u00a013, 79\u2013110 (2003)","journal-title":"DEDS"},{"key":"14_CR16","unstructured":"Precup, D., Sutton, R.S., Singh, S.P.: Eligibility Traces for Off-Policy Policy Evaluation. In: ICML (2000)"},{"key":"14_CR17","doi-asserted-by":"crossref","unstructured":"Ripley, B.D.: Stochastic Simulation. Wiley & Sons (1987)","DOI":"10.1002\/9780470316726"},{"key":"14_CR18","unstructured":"Scherrer, B.: Should one compute the Temporal Difference fix point or minimize the Bellman Residual? The unified oblique projection view. In: ICML (2010)"},{"key":"14_CR19","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction (Adaptive Computation and Machine Learning), 3rd edn. MIT Press (1998)"},{"issue":"5","key":"14_CR20","doi-asserted-by":"publisher","first-page":"674","DOI":"10.1109\/9.580874","volume":"42","author":"J. Tsitsiklis","year":"1997","unstructured":"Tsitsiklis, J., Van Roy, B.: An analysis of temporal-difference learning with function approximation. IEEE Transactions on Automatic Control\u00a042(5), 674\u2013690 (1997)","journal-title":"IEEE Transactions on Automatic Control"},{"key":"14_CR21","unstructured":"Yu, H.: Convergence of Least-Squares Temporal Difference Methods under General Conditions. In: ICML (2010)"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-29946-9_14.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,5,4]],"date-time":"2021-05-04T11:22:23Z","timestamp":1620127343000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-29946-9_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642299452","9783642299469"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-29946-9_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}