{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T14:00:01Z","timestamp":1725804001087},"publisher-location":"Berlin, Heidelberg","reference-count":25,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783662448502"},{"type":"electronic","value":"9783662448519"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-662-44851-9_35","type":"book-chapter","created":{"date-parts":[[2014,9,1]],"date-time":"2014-09-01T09:00:58Z","timestamp":1409562058000},"page":"549-564","source":"Crossref","is-referenced-by-count":18,"title":["Boosted Bellman Residual Minimization Handling Expert Demonstrations"],"prefix":"10.1007","author":[{"given":"Bilal","family":"Piot","sequence":"first","affiliation":[]},{"given":"Matthieu","family":"Geist","sequence":"additional","affiliation":[]},{"given":"Olivier","family":"Pietquin","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"35_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel, P., Ng, A.: Apprenticeship learning via inverse reinforcement learning. In: Proc. of ICML (2004)","DOI":"10.1145\/1015330.1015430"},{"key":"35_CR2","doi-asserted-by":"crossref","unstructured":"Antos, A., Szepesv\u00e1ri, C., Munos, R.: Learning near-optimal policies with bellman-residual minimization based fitted policy iteration and a single sample path. Machine Learning (2008)","DOI":"10.1007\/s10994-007-5038-2"},{"key":"35_CR3","doi-asserted-by":"crossref","unstructured":"Archibald, T., McKinnon, K., Thomas, L.: On the generation of markov decision processes. Journal of the Operational Research Society (1995)","DOI":"10.2307\/2584329"},{"key":"35_CR4","doi-asserted-by":"crossref","unstructured":"Aronszajn, N.: Theory of reproducing kernels. Transactions of the American Mathematical Society (1950)","DOI":"10.21236\/ADA296533"},{"key":"35_CR5","volume-title":"Dynamic programming and optimal control","author":"D. Bertsekas","year":"1995","unstructured":"Bertsekas, D.: Dynamic programming and optimal control, vol.\u00a01. Athena Scientific, Belmont (1995)"},{"key":"35_CR6","doi-asserted-by":"crossref","unstructured":"Bradtke, S., Barto, A.: Linear least-squares algorithms for temporal difference learning. Machine Learning (1996)","DOI":"10.1007\/BF00114723"},{"key":"35_CR7","unstructured":"Breiman, L.: Classification and regression trees. CRC Press (1993)"},{"key":"35_CR8","doi-asserted-by":"crossref","unstructured":"Clarke, F.: Generalized gradients and applications. Transactions of the American Mathematical Society (1975)","DOI":"10.2307\/1997202"},{"key":"35_CR9","unstructured":"Farahmand, A., Munos, R., Szepesv\u00e1ri, C.: Error propagation for approximate policy and value iteration. In: Proc. of NIPS (2010)"},{"key":"35_CR10","unstructured":"Grubb, A., Bagnell, J.: Generalized boosting algorithms for convex optimization. In: Proc. of ICML (2011)"},{"key":"35_CR11","unstructured":"Judah, K., Fern, A., Dietterich, T.: Active imitation learning via reduction to iid active learning. In: Proc. of UAI (2012)"},{"key":"35_CR12","unstructured":"Kim, B., Farahmand, A., Pineau, J., Precup, D.: Learning from limited demonstrations. In: Proc. of NIPS (2013)"},{"key":"35_CR13","unstructured":"Klein, E., Geist, M., Piot, B., Pietquin, O.: Inverse reinforcement learning through structured classification. In: Proc. of NIPS (2012)"},{"key":"35_CR14","unstructured":"Lagoudakis, M., Parr, R.: Least-squares policy iteration. Journal of Machine Learning Research (2003)"},{"key":"35_CR15","unstructured":"Lever, G., Baldassarre, L., Gretton, A., Pontil, M., Gr\u00fcnew\u00e4lder, S.: Modelling transition dynamics in mdps with rkhs embeddings. In: Proc. of ICML (2012)"},{"key":"35_CR16","doi-asserted-by":"crossref","unstructured":"Munos, R.: Performance bounds in l_p-norm for approximate value iteration. SIAM Journal on Control and Optimization (2007)","DOI":"10.1137\/040614384"},{"key":"35_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1007\/978-3-642-40988-2_2","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"B. Piot","year":"2013","unstructured":"Piot, B., Geist, M., Pietquin, O.: Learning from demonstrations: Is it worth estimating a reward function? In: Blockeel, H., Kersting, K., Nijssen, S., \u017delezn\u00fd, F. (eds.) ECML PKDD 2013, Part I. LNCS, vol.\u00a08188, pp. 17\u201332. Springer, Heidelberg (2013)"},{"key":"35_CR18","doi-asserted-by":"crossref","unstructured":"Puterman, M.: Markov decision processes: Discrete stochastic dynamic programming. John Wiley & Sons (1994)","DOI":"10.1002\/9780470316887"},{"key":"35_CR19","doi-asserted-by":"crossref","unstructured":"Ratliff, N., Bagnell, J., Srinivasa, S.: Imitation learning for locomotion and manipulation. In: Proc. of IEEE-RAS International Conference on Humanoid Robots (2007)","DOI":"10.21236\/ADA528601"},{"key":"35_CR20","doi-asserted-by":"crossref","unstructured":"Ratliff, N., Bagnell, J., Zinkevich, M.: Maximum margin planning. In: Proc. of ICML (2006)","DOI":"10.1145\/1143844.1143936"},{"key":"35_CR21","unstructured":"Ross, S., Gordon, G., Bagnell, J.: A reduction of imitation learning and structured prediction to no-regret online learning. In: Proc. of AISTATS (2011)"},{"key":"35_CR22","doi-asserted-by":"crossref","unstructured":"Shor, N., Kiwiel, K., Ruszcaynski, A.: Minimization methods for non-differentiable functions. Springer (1985)","DOI":"10.1007\/978-3-642-82118-9"},{"key":"35_CR23","unstructured":"Sriperumbudur, B., Gretton, A., Fukumizu, K., Sch\u00f6lkopf, B., Lanckriet, G.: Hilbert space embeddings and metrics on probability measures. The Journal of Machine Learning Research (2010)"},{"key":"35_CR24","doi-asserted-by":"crossref","unstructured":"Syed, U., Bowling, M., Schapire, R.: Apprenticeship learning using linear programming. In: Proc. of ICML (2008)","DOI":"10.1145\/1390156.1390286"},{"key":"35_CR25","doi-asserted-by":"crossref","unstructured":"Yu, B.: Rates of convergence for empirical processes of stationary mixing sequences. The Annals of Probability (1994)","DOI":"10.1214\/aop\/1176988849"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-662-44851-9_35","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,14]],"date-time":"2019-09-14T20:09:54Z","timestamp":1568491794000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-662-44851-9_35"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783662448502","9783662448519"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-662-44851-9_35","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2014]]}}}