乐胖代购免代理版

{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T04:53:49Z","timestamp":1725684829458},"publisher-location":"Berlin, Heidelberg","reference-count":17,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642299452"},{"type":"electronic","value":"9783642299469"}],"license":[{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-29946-9_28","type":"book-chapter","created":{"date-parts":[[2012,5,18]],"date-time":"2012-05-18T17:01:49Z","timestamp":1337360509000},"page":"285-296","source":"Crossref","is-referenced-by-count":4,"title":["Batch, Off-Policy and Model-Free Apprenticeship Learning"],"prefix":"10.1007","author":[{"given":"Edouard","family":"Klein","sequence":"first","affiliation":[]},{"given":"Matthieu","family":"Geist","sequence":"additional","affiliation":[]},{"given":"Olivier","family":"Pietquin","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"28_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel, P., Ng, A.: Apprenticeship learning via inverse reinforcement learning. In: Proceedings of the Twenty-First International Conference on Machine Learning, p. 1. ACM (2004)","DOI":"10.1145\/1015330.1015430"},{"issue":"1","key":"28_CR2","first-page":"33","volume":"22","author":"S. Bradtke","year":"1996","unstructured":"Bradtke, S., Barto, A.: Linear least-squares algorithms for temporal difference learning. Machine Learning\u00a022(1), 33\u201357 (1996)","journal-title":"Machine Learning"},{"key":"28_CR3","unstructured":"Kolter, J., Abbeel, P., Ng, A.: Hierarchical apprenticeship learning with application to quadruped locomotion. In: Neural Information Processing Systems, vol.\u00a020 (2008)"},{"key":"28_CR4","first-page":"1107","volume":"4","author":"M. Lagoudakis","year":"2003","unstructured":"Lagoudakis, M., Parr, R.: Least-squares policy iteration. The Journal of Machine Learning Research\u00a04, 1107\u20131149 (2003)","journal-title":"The Journal of Machine Learning Research"},{"key":"28_CR5","unstructured":"Lazaric, A., Ghavamzadeh, M., Munos, R.: Finite-sample analysis of lstd. In: Proceedings of the 27th International Conference on Machine Learning (2010)"},{"issue":"1","key":"28_CR6","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1023\/A:1022192903948","volume":"13","author":"A. Nedi\u0107","year":"2003","unstructured":"Nedi\u0107, A., Bertsekas, D.: Least squares policy evaluation algorithms with linear function approximation. Discrete Event Dynamic Systems\u00a013(1), 79\u2013110 (2003)","journal-title":"Discrete Event Dynamic Systems"},{"key":"28_CR7","unstructured":"Neu, G., Szepesv\u00e1ri, C.: Apprenticeship learning using inverse reinforcement learning and gradient methods. In: Proc. UAI, pp. 295\u2013302 (2007)"},{"key":"28_CR8","unstructured":"Ng, A., Russell, S.: Algorithms for inverse reinforcement learning. In: Proceedings of the Seventeenth International Conference on Machine Learning, pp. 663\u2013670. Morgan Kaufmann Publishers Inc. (2000)"},{"key":"28_CR9","unstructured":"Ramachandran, D., Amir, E.: Bayesian inverse reinforcement learning. In: Proceedings of the International Joint Conference on Artificial Intelligence, pp. 2586\u20132591 (2007)"},{"key":"28_CR10","doi-asserted-by":"crossref","unstructured":"Ratliff, N., Bagnell, J., Srinivasa, S.: Imitation learning for locomotion and manipulation. In: 2007 7th IEEE-RAS International Conference on Humanoid Robots, pp. 392\u2013397. IEEE (2007)","DOI":"10.1109\/ICHR.2007.4813899"},{"key":"28_CR11","doi-asserted-by":"crossref","unstructured":"Ratliff, N., Bradley, D., Bagnell, J., Chestnutt, J.: Boosting structured prediction for imitation learning. In: Advances in Neural Information Processing Systems, vol.\u00a019, p. 1153 (2007)","DOI":"10.7551\/mitpress\/7503.003.0149"},{"key":"28_CR12","doi-asserted-by":"crossref","unstructured":"Ratliff, N., Bagnell, J., Zinkevich, M.: Maximum margin planning. In: Proceedings of the 23rd International Conference on Machine Learning, p. 736. ACM (2006)","DOI":"10.1145\/1143844.1143936"},{"key":"28_CR13","doi-asserted-by":"crossref","unstructured":"Russell, S.: Learning agents for uncertain environments (extended abstract). In: Proceedings of the Eleventh Annual Conference on Computational Learning Theory, p. 103. ACM (1998)","DOI":"10.1145\/279943.279964"},{"key":"28_CR14","unstructured":"Sutton, R., Barto, A.: Reinforcement learning. MIT Press (1998)"},{"key":"28_CR15","doi-asserted-by":"crossref","unstructured":"Syed, U., Bowling, M., Schapire, R.: Apprenticeship learning using linear programming. In: Proceedings of the 25th International Conference on Machine Learning, pp. 1032\u20131039. ACM (2008)","DOI":"10.1145\/1390156.1390286"},{"key":"28_CR16","unstructured":"Syed, U., Schapire, R.: A game-theoretic approach to apprenticeship learning. In: Advances in Neural Information Processing Systems, vol.\u00a020, pp. 1449\u20131456 (2008)"},{"key":"28_CR17","unstructured":"Ziebart, B., Maas, A., Bagnell, J., Dey, A.: Maximum entropy inverse reinforcement learning. In: Proc. AAAI, pp. 1433\u20131438 (2008)"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in Reinforcement Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-29946-9_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,24]],"date-time":"2024-04-24T17:30:11Z","timestamp":1713979811000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-29946-9_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642299452","9783642299469"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-29946-9_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}