乐胖代购免代理版

{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T07:31:49Z","timestamp":1725521509735},"publisher-location":"Berlin, Heidelberg","reference-count":21,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540897217"},{"type":"electronic","value":"9783540897224"}],"license":[{"start":{"date-parts":[[2008,1,1]],"date-time":"2008-01-01T00:00:00Z","timestamp":1199145600000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2008]]},"DOI":"10.1007\/978-3-540-89722-4_9","type":"book-chapter","created":{"date-parts":[[2008,11,26]],"date-time":"2008-11-26T13:57:14Z","timestamp":1227707834000},"page":"110-123","source":"Crossref","is-referenced-by-count":3,"title":["Basis Expansion in Natural Actor Critic Methods"],"prefix":"10.1007","author":[{"given":"Sertan","family":"Girgin","sequence":"first","affiliation":[]},{"given":"Philippe","family":"Preux","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"9_CR1","volume-title":"Reinforcement Learning: An Introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998); A Bradford Book"},{"key":"9_CR2","volume-title":"Dynamic programming and Markov processes","author":"R. Howard","year":"1960","unstructured":"Howard, R.: Dynamic programming and Markov processes. MIT Press, Cambridge (1960)"},{"key":"9_CR3","series-title":"Probability and mathematical statistics","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes \u2014 Discrete Stochastic Dynamic Programming","author":"M. Puterman","year":"1994","unstructured":"Puterman, M.: Markov Decision Processes \u2014 Discrete Stochastic Dynamic Programming. Probability and mathematical statistics. Wiley, Chichester (1994)"},{"key":"9_CR4","unstructured":"Sutton, R.S., McAllester, D., Singh, S., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation. In: Neural Information Processing Systems (NIPS), pp. 1057\u20131063 (1999)"},{"key":"9_CR5","first-page":"229","volume":"8","author":"R.J. Williams","year":"1992","unstructured":"Williams, R.J.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Machine Learning\u00a08, 229\u2013256 (1992)","journal-title":"Machine Learning"},{"issue":"4","key":"9_CR6","doi-asserted-by":"publisher","first-page":"1143","DOI":"10.1137\/S0363012901385691","volume":"42","author":"V.R. Konda","year":"2003","unstructured":"Konda, V.R., Tsitsiklis, J.N.: On actor-critic algorithms. SIAM J. Control Optim.\u00a042(4), 1143\u20131166 (2003)","journal-title":"SIAM J. Control Optim."},{"key":"9_CR7","doi-asserted-by":"crossref","unstructured":"Peters, J., Schaal, S.: Policy gradient methods for robotics. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems, October 2006, pp. 2219\u20132225 (2006)","DOI":"10.1109\/IROS.2006.282564"},{"issue":"2","key":"9_CR8","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1162\/089976698300017746","volume":"10","author":"S.-i. Amari","year":"1998","unstructured":"Amari, S.i.: Natural gradient works efficiently in learning. Neural Computation\u00a010(2), 251\u2013276 (1998)","journal-title":"Neural Computation"},{"issue":"7-9","key":"9_CR9","doi-asserted-by":"publisher","first-page":"1180","DOI":"10.1016\/j.neucom.2007.11.026","volume":"71","author":"J. Peters","year":"2008","unstructured":"Peters, J., Schaal, S.: Natural actor-critic. Neurocomput.\u00a071(7-9), 1180\u20131190 (2008)","journal-title":"Neurocomput."},{"key":"9_CR10","first-page":"105","volume-title":"Advances in Neural Information Processing Systems","author":"S. Bhatnagar","year":"2008","unstructured":"Bhatnagar, S., Sutton, R., Ghavamzadeh, M., Lee, M.: Incremental natural actor-critic algorithms. In: Platt, J.C., Koller, D., Singer, Y., Roweis, S. (eds.) Advances in Neural Information Processing Systems, vol.\u00a020, pp. 105\u2013112. MIT Press, Cambridge (2008)"},{"key":"9_CR11","doi-asserted-by":"crossref","unstructured":"Riedmiller, M., Peters, J., Schaal, S.: Evaluation of policy gradient methods and variants on the cart-pole benchmark. In: IEEE International Symposium on Approximate Dynamic Programming and Reinforcement Learning, 2007. ADPRL 2007, pp. 254\u2013261 (2007)","DOI":"10.1109\/ADPRL.2007.368196"},{"key":"9_CR12","series-title":"Denver 1989","first-page":"524","volume-title":"Advances in Neural Information Processing Systems","author":"S.E. Fahlman","year":"1990","unstructured":"Fahlman, S.E., Lebiere, C.: The cascade-correlation learning architecture. In: Touretzky, D.S. (ed.) Advances in Neural Information Processing Systems. Denver 1989, vol.\u00a02, pp. 524\u2013532. Morgan Kaufmann, San Mateo (1990)"},{"key":"9_CR13","unstructured":"Riedmiller, M., Braun, H.: A direct adaptive method for faster backpropagation learning: the rprop algorithm, vol.\u00a01, pp. 586\u2013591 (1993)"},{"key":"9_CR14","first-page":"1157","volume":"3","author":"I. Guyon","year":"2003","unstructured":"Guyon, I., Elisseff, A.: An introduction to variable and feature selection. Journal of Machine Learning Research\u00a03, 1157\u20131182 (2003)","journal-title":"Journal of Machine Learning Research"},{"key":"9_CR15","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1007\/s10479-005-5732-z","volume":"134","author":"I. Menache","year":"2005","unstructured":"Menache, I., Mannor, S., Shimkin, N.: Basis function adaptation in temporal difference reinforcement learning. Annals of Operations Research\u00a0134, 215\u2013238 (2005)","journal-title":"Annals of Operations Research"},{"key":"9_CR16","doi-asserted-by":"publisher","first-page":"449","DOI":"10.1145\/1143844.1143901","volume-title":"ICML","author":"P.W. Keller","year":"2006","unstructured":"Keller, P.W., Mannor, S., Precup, D.: Automatic basis function construction for approximate dynamic programming and reinforcement learning. In: ICML, pp. 449\u2013456. ACM, New York (2006)"},{"key":"9_CR17","doi-asserted-by":"publisher","first-page":"737","DOI":"10.1145\/1273496.1273589","volume-title":"ICML","author":"R. Parr","year":"2007","unstructured":"Parr, R., Painter-Wakefield, C., Li, L., Littman, M.: Analyzing feature generation for value-function approximation. In: ICML, pp. 737\u2013744. ACM, New York (2007)"},{"key":"9_CR18","unstructured":"Mahadevan, S.: Representation policy iteration. In: UAI, pp. 372\u2013379 (2005)"},{"key":"9_CR19","doi-asserted-by":"publisher","first-page":"385","DOI":"10.1145\/1273496.1273545","volume-title":"ICML","author":"J. Johns","year":"2007","unstructured":"Johns, J., Mahadevan, S.: Constructing basis functions from directed graphs for value function approximation. In: ICML, pp. 385\u2013392. ACM, New York (2007)"},{"key":"9_CR20","first-page":"2169","volume":"8","author":"S. Mahadevan","year":"2007","unstructured":"Mahadevan, S., Maggioni, M.: Proto-value functions: A laplacian framework for learning representation and control in markov decision processes. Journal of Machine Learning Research\u00a08, 2169\u20132231 (2007)","journal-title":"Journal of Machine Learning Research"},{"key":"9_CR21","first-page":"632","volume-title":"ICML","author":"F. Rivest","year":"2003","unstructured":"Rivest, F., Precup, D.: Combining TD-learning with cascade-correlation networks. In: Fawcett, T., Mishra, N. (eds.) ICML, pp. 632\u2013639. AAAI Press, Menlo Park (2003)"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-89722-4_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,9,23]],"date-time":"2021-09-23T07:16:56Z","timestamp":1632381416000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-89722-4_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008]]},"ISBN":["9783540897217","9783540897224"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-89722-4_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2008]]}}}