乐胖代购免代理版

{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T04:53:42Z","timestamp":1725684822915},"publisher-location":"Berlin, Heidelberg","reference-count":16,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642299452"},{"type":"electronic","value":"9783642299469"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-29946-9_17","type":"book-chapter","created":{"date-parts":[[2012,5,18]],"date-time":"2012-05-18T17:01:49Z","timestamp":1337360509000},"page":"153-164","source":"Crossref","is-referenced-by-count":6,"title":["Unified Inter and Intra Options Learning Using Policy Gradient Methods"],"prefix":"10.1007","author":[{"given":"Kfir Y.","family":"Levy","sequence":"first","affiliation":[]},{"given":"Nahum","family":"Shimkin","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"17_CR1","unstructured":"Comanici, G., Precup, D.: Optimal policy switching algorithms for reinforcement learning. In: Proceedings of the 9th International Conference on Autonomous Agents and Multiagent Systems, pp. 709\u2013714 (2010)"},{"key":"17_CR2","unstructured":"Ghavamzadeh, M., Mahadevan, S.: Hierarchical policy gradient algorithms. In: Twentieth ICML, pp. 226\u2013233 (2003)"},{"key":"17_CR3","doi-asserted-by":"crossref","unstructured":"Neumann, G., Maass, W., Peters, J.: Learning complex motions by sequencing simpler motion templates. In: ICML (2009)","DOI":"10.1145\/1553374.1553471"},{"key":"17_CR4","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"R.S. Sutton","year":"1999","unstructured":"Sutton, R.S., Precup, D., Singh, S.: Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning. Artificial intelligence\u00a0112, 181\u2013211 (1999)","journal-title":"Artificial intelligence"},{"key":"17_CR5","doi-asserted-by":"crossref","unstructured":"Simsek, O., Barto, A.: Using relative novelty to identify useful temporal abstractions in reinforcement learning. In: ICML, vol.\u00a021, p. 751. Citeseer (2004)","DOI":"10.1145\/1015330.1015353"},{"key":"17_CR6","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1007\/3-540-36755-1_25","volume-title":"Machine Learning: ECML 2002","author":"I. Menache","year":"2002","unstructured":"Menache, I., Mannor, S., Shimkin, N.: Q-Cut - Dynamic Discovery of Sub-goals in Reinforcement Learning. In: Elomaa, T., Mannila, H., Toivonen, H. (eds.) ECML 2002. LNCS (LNAI), vol.\u00a02430, pp. 295\u2013306. Springer, Heidelberg (2002)"},{"key":"17_CR7","unstructured":"Sutton, R.S., McAllester, D., Singh, S., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation. In: Advances in Neural Information Processing Systems, vol. 12 (2000)"},{"issue":"7-9","key":"17_CR8","doi-asserted-by":"publisher","first-page":"1180","DOI":"10.1016\/j.neucom.2007.11.026","volume":"71","author":"J. Peters","year":"2008","unstructured":"Peters, J., Schaal, S.: Natural actor-critic. Neurocomputing\u00a071(7-9), 1180\u20131190 (2008)","journal-title":"Neurocomputing"},{"key":"17_CR9","doi-asserted-by":"publisher","first-page":"2471","DOI":"10.1016\/j.automatica.2009.07.008","volume":"45","author":"S. Bhatnagar","year":"2009","unstructured":"Bhatnagar, S., Sutton, R.S., Ghavamzadeh, M., Lee, M.: Natural actor-critic algorithms. Automatica\u00a045, 2471\u20132482 (2009)","journal-title":"Automatica"},{"key":"17_CR10","doi-asserted-by":"crossref","unstructured":"Richter, S., Aberdeen, D., Yu, J.: Natural actor-critic for road traffic optimisation. In: Advances in Neural Information Processing Systems, vol.\u00a019, p. 1169 (2007)","DOI":"10.7551\/mitpress\/7503.003.0151"},{"key":"17_CR11","doi-asserted-by":"crossref","unstructured":"Buffet, O., Dutech, A., Charpillet, F.: Shaping multi-agent systems with gradient reinforcement learning. In: Autonomous Agents and Multi-Agent Systems (2007)","DOI":"10.1007\/s10458-006-9010-5"},{"key":"17_CR12","unstructured":"Kakade, S.: A natural policy gradient. In: Advances in Neural Information Processing Systems 14, vol.\u00a02, pp. 1531\u20131538 (2002)"},{"key":"17_CR13","unstructured":"Bagnell, J., Schneider, J.: Covariant policy search. In: International Joint Conference on Artificial Intelligence, vol.\u00a018, pp. 1019\u20131024. Citeseer (2003)"},{"key":"17_CR14","doi-asserted-by":"publisher","first-page":"233","DOI":"10.1023\/A:1017936530646","volume":"49","author":"J.A. Boyan","year":"2002","unstructured":"Boyan, J.A.: Technical update: Least-squares temporal difference learning. Machine Learning\u00a049, 233\u2013246 (2002)","journal-title":"Machine Learning"},{"key":"17_CR15","doi-asserted-by":"crossref","unstructured":"Nedi\u0107, A., Bertsekas, D.: Least squares policy evaluation algorithms with linear function approximation. Discrete Event Dynamic Systems\u00a013 (2003)","DOI":"10.1023\/A:1022192903948"},{"key":"17_CR16","doi-asserted-by":"crossref","unstructured":"Yoshimoto, J., Nishimura, M., Tokita, Y., Ishii, S.: Acrobot control by learning the switching of multiple controllers. Artificial Life and Robotics\u00a09 (2005)","DOI":"10.1007\/s10015-004-0340-6"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-29946-9_17.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,24]],"date-time":"2024-04-24T17:29:39Z","timestamp":1713979779000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-29946-9_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642299452","9783642299469"],"references-count":16,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-29946-9_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}