{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,12]],"date-time":"2024-09-12T04:31:00Z","timestamp":1726115460358},"publisher-location":"Cham","reference-count":56,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030667221"},{"type":"electronic","value":"9783030667238"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-66723-8_19","type":"book-chapter","created":{"date-parts":[[2021,2,9]],"date-time":"2021-02-09T13:36:54Z","timestamp":1612877814000},"page":"313-329","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":17,"title":["Imitation Learning as f-Divergence Minimization"],"prefix":"10.1007","author":[{"given":"Liyiming","family":"Ke","sequence":"first","affiliation":[]},{"given":"Sanjiban","family":"Choudhury","sequence":"additional","affiliation":[]},{"given":"Matt","family":"Barnes","sequence":"additional","affiliation":[]},{"given":"Wen","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Gilwoo","family":"Lee","sequence":"additional","affiliation":[]},{"given":"Siddhartha","family":"Srinivasa","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,2,9]]},"reference":[{"key":"19_CR1","doi-asserted-by":"crossref","unstructured":"Ross, S., Melik-Barkhudarov, N., Shankar, K.S., Wendel, A., Dey, D., Bagnell, J.A., Hebert, M.: Learning monocular reactive UAV control in cluttered natural environments. In: 2013 IEEE International Conference on Robotics and Automation (ICRA) (2013)","DOI":"10.1109\/ICRA.2013.6630809"},{"key":"19_CR2","unstructured":"Finn, C., Levine, S., Abbeel, P.: Guided cost learning: deep inverse optimal control via policy optimization. In: International Conference on Machine Learning, pp. 49\u201358 (2016)"},{"key":"19_CR3","unstructured":"Pomerleau, D.A.: ALVINN: an autonomous land vehicle in a neural network. In: Touretzky, D.S. (ed.) Advances in Neural Information Processing Systems, vol. 1, pp. 305\u2013313. Morgan-Kaufmann (1989)"},{"key":"19_CR4","unstructured":"Li, Y., Song, J., Ermon, S.: Infogail: interpretable imitation learning from visual demonstrations. In: Advances in Neural Information Processing Systems, pp. 3812\u20133822 (2017)"},{"key":"19_CR5","unstructured":"Ho, J., Ermon, S.: Generative adversarial imitation learning. In: Advances in Neural Information Processing Systems, pp. 4565\u20134573 (2016)"},{"key":"19_CR6","unstructured":"Nowozin, S., Cseke, B., Tomioka, R.: f-gan: training generative neural samplers using variational divergence minimization. In: Advances in Neural Information Processing Systems, pp. 271\u2013279 (2016)"},{"key":"19_CR7","unstructured":"Ke, L., Choudhury, S., Barnes, M., Sun, W., Lee, G., Srinivasa, S.: Imitation learning as $$f$$-divergence minimization. arXiv preprint arXiv:1905.12888v2 (2019)"},{"key":"19_CR8","doi-asserted-by":"crossref","unstructured":"Osa, T., Pajarinen, J., Neumann, G., Bagnell, J.A., Abbeel, P., Peters, J.: An algorithmic perspective on imitation learning. arXiv preprint arXiv:1811.06711 (2018)","DOI":"10.1561\/9781680834116"},{"issue":"5","key":"19_CR9","doi-asserted-by":"publisher","first-page":"469","DOI":"10.1016\/j.robot.2008.10.024","volume":"57","author":"BD Argall","year":"2009","unstructured":"Argall, B.D., Chernova, S., Veloso, M., Browning, B.: A survey of robot learning from demonstration. Robot. Autonom. Syst. 57(5), 469\u2013483 (2009)","journal-title":"Robot. Autonom. Syst."},{"key":"19_CR10","doi-asserted-by":"publisher","first-page":"1995","DOI":"10.1007\/978-3-319-32552-1_74","volume-title":"Springer Handbook of Robotics","author":"AG Billard","year":"2016","unstructured":"Billard, A.G., Calinon, S., Dillmann, R.: Learning from humans. In: Siciliano, B., Khatib, O. (eds.) Springer Handbook of Robotics, pp. 1995\u20132014. Springer, Heidelberg (2016)"},{"key":"19_CR11","unstructured":"Bagnell, J.A.: An invitation to imitation. Technical Report CMU-RI-TR-15-08, Carnegie Mellon University, Pittsburgh, PA, March 2015"},{"key":"19_CR12","unstructured":"Ross, S., Bagnell, J.A.: Reinforcement and imitation learning via interactive no-regret learning. arXiv preprint arXiv:1406.5979 (2014)"},{"key":"19_CR13","unstructured":"Sun, W., Venkatraman, A., Gordon, G.J., Boots, B., Bagnell, J.A.: Deeply aggrevated: differentiable imitation learning for sequential prediction. In: Proceedings of the 34th International Conference on Machine Learning-Volume 70, pp. 3309\u20133318. JMLR. org (2017)"},{"key":"19_CR14","unstructured":"Sun, W., Bagnell, J.A., Boots, B.: Truncated horizon policy search: combining reinforcement learning & imitation learning. arXiv:1805.11240 (2018)"},{"key":"19_CR15","unstructured":"Cheng, C.A., Yan, X., Wagener, N., Boots, B.: Fast policy learning through imitation and reinforcement. arXiv:1805.10413 (2018)"},{"key":"19_CR16","doi-asserted-by":"crossref","unstructured":"Rajeswaran, A., Kumar, V., Gupta, A., Vezzani, G., Schulman, J., Todorov, E., Levine, S.: Learning complex dexterous manipulation with deep reinforcement learning and demonstrations. arXiv preprint arXiv:1709.10087 (2017)","DOI":"10.15607\/RSS.2018.XIV.049"},{"key":"19_CR17","unstructured":"Pomerleau, D.A.: Alvinn: an autonomous land vehicle in a neural network. In: Advances in Neural Information Processing Systems, pp. 305\u2013313 (1989)"},{"key":"19_CR18","unstructured":"Ross, S., Gordon, G., Bagnell, D.: A reduction of imitation learning and structured prediction to no-regret online learning. In: AISTATS (2011)"},{"key":"19_CR19","unstructured":"Kim, B., Farahmand, A.M., Pineau, J., Precup, D.: Learning from limited demonstrations. In: Advances in Neural Information Processing Systems, pp. 2859\u20132867 (2013)"},{"key":"19_CR20","doi-asserted-by":"crossref","unstructured":"Gupta, S., Davidson, J., Levine, S., Sukthankar, R., Malik, J.: Cognitive mapping and planning for visual navigation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2017)","DOI":"10.1109\/CVPR.2017.769"},{"key":"19_CR21","unstructured":"Laskey, M., Lee, J., Hsieh, W., Liaw, R., Mahler, J., Fox, R., Goldberg, K.: Iterative noise injection for scalable imitation learning. arXiv preprint arXiv:1703.09327 (2017)"},{"key":"19_CR22","doi-asserted-by":"crossref","unstructured":"Laskey, M., Staszak, S., Hsieh, W.Y.S., Mahler, J., Pokorny, F.T., Dragan, A.D. and Goldberg, K.: Shiv: reducing supervisor burden in dagger using support vectors for efficient learning from demonstrations in high dimensional state spaces. In: 2016 IEEE International Conference on Robotics and Automation (ICRA), pp. 462\u2013469. IEEE (2016)","DOI":"10.1109\/ICRA.2016.7487167"},{"key":"19_CR23","doi-asserted-by":"crossref","unstructured":"Laskey, M., Chuck, C., Lee, J., Mahler, J., Krishnan, S., Jamieson, K., Dragan, A., Goldberg, K.: Comparing human-centric and robot-centric sampling for robot deep learning from demonstrations. In: 2017 IEEE International Conference on Robotics and Automation (ICRA). IEEE (2017)","DOI":"10.1109\/ICRA.2017.7989046"},{"issue":"1","key":"19_CR24","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1007\/s10514-009-9121-3","volume":"27","author":"ND Ratliff","year":"2009","unstructured":"Ratliff, N.D., Silver, D., Bagnell, J.A.: Learning to search: functional gradient techniques for imitation learning. Autonom. Robots 27(1), 25\u201353 (2009)","journal-title":"Autonom. Robots"},{"key":"19_CR25","doi-asserted-by":"crossref","unstructured":"Ratliff, N.D., Bagnell, J.A., Zinkevich, M.A.: Maximum margin planning. In: International Conference on Machine Learning. ACM (2006)","DOI":"10.1145\/1143844.1143936"},{"issue":"8","key":"19_CR26","doi-asserted-by":"publisher","first-page":"1814","DOI":"10.1109\/TNNLS.2016.2543000","volume":"28","author":"B Piot","year":"2017","unstructured":"Piot, B., Geist, M., Pietquin, O.: Bridging the gap between imitation learning and inverse reinforcement learning. IEEE Trans. Neural Netw. Learn. Syst. 28(8), 1814\u20131826 (2017)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"19_CR27","doi-asserted-by":"crossref","unstructured":"Abbeel, P., Ng, A.Y.: Apprenticeship learning via inverse reinforcement learning. In: International Conference on Machine Learning. ACM (2004)","DOI":"10.1145\/1015330.1015430"},{"key":"19_CR28","unstructured":"Ziebart, B.D., Maas, A.L., Bagnell, J.A., Dey, A.K.: Maximum entropy inverse reinforcement learning. In: AAAI (2008)"},{"key":"19_CR29","unstructured":"Wulfmeier, M., Ondruska, P., Posner, I.: Maximum entropy deep inverse reinforcement learning. arXiv preprint arXiv:1507.04888 (2015)"},{"key":"19_CR30","unstructured":"Syed, U., Schapire, R.E.: A game-theoretic approach to apprenticeship learning. In: Advances in Neural Information Processing Systems (2008)"},{"key":"19_CR31","unstructured":"Ho, J., Gupta, J., Ermon, S.: Model-free imitation learning with policy optimization. In: International Conference on Machine Learning (2016)"},{"key":"19_CR32","unstructured":"Finn, C., Christiano, P., Abbeel, P., Levine, S.: A connection between generative adversarial networks, inverse reinforcement learning, and energy-based models. arXiv preprint arXiv:1611.03852 (2016)"},{"key":"19_CR33","unstructured":"Goodfellow, I., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A., Bengio, Y.: Generative adversarial nets. In: Advances in Neural Information Processing Systems, pp. 2672\u20132680 (2014)"},{"key":"19_CR34","unstructured":"Blond\u00e9, L., Kalousis, A.: Sample-efficient imitation learning via generative adversarial nets. arXiv preprint arXiv:1809.02064 (2018)"},{"key":"19_CR35","unstructured":"Fu, J., Luo, K., Levine, S.: Learning robust rewards with adversarial inverse reinforcement learning. arXiv preprint arXiv:1710.11248 (2017)"},{"key":"19_CR36","unstructured":"Qureshi, A.H., Boots, B., Yip, M.C.: Adversarial imitation via variational inverse reinforcement learning. arXiv preprint arXiv:1809.06404 (2018)"},{"key":"19_CR37","unstructured":"Peng, X.B., Kanazawa, A., Toyer, S., Abbeel, P., Levine, S.: Variational discriminator bottleneck: improving imitation learning, inverse RL, and GANS by constraining information flow. arXiv preprint arXiv:1810.00821 (2018)"},{"key":"19_CR38","doi-asserted-by":"crossref","unstructured":"Torabi, F., Warnell, G., Stone, P.: Generative adversarial imitation from observation. arXiv preprint arXiv:1807.06158 (2018)","DOI":"10.24963\/ijcai.2018\/687"},{"key":"19_CR39","doi-asserted-by":"crossref","unstructured":"Torabi, F., Warnell, G., Stone, P.: Behavioral cloning from observation. arXiv preprint arXiv:1805.01954 (2018)","DOI":"10.24963\/ijcai.2018\/687"},{"key":"19_CR40","doi-asserted-by":"crossref","unstructured":"Peng, X.B., Kanazawa, A., Malik, J., Abbeel, P., Levine, S.: Sfv: reinforcement learning of physical skills from videos. In: SIGGRAPH Asia 2018 Technical Papers, p. 178. ACM (2018)","DOI":"10.1145\/3272127.3275014"},{"issue":"11","key":"19_CR41","doi-asserted-by":"publisher","first-page":"5847","DOI":"10.1109\/TIT.2010.2068870","volume":"56","author":"X Nguyen","year":"2010","unstructured":"Nguyen, X., Wainwright, M.J., Jordan, M.I.: Estimating divergence functionals and the likelihood ratio by convex risk minimization. IEEE Trans. Inf. Theory 56(11), 5847\u20135861 (2010)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"19_CR42","unstructured":"Boularias, A., Kober, J., Peters, J.: Relative entropy inverse reinforcement learning. In: Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics, pp. 182\u2013189 (2011)"},{"key":"19_CR43","doi-asserted-by":"crossref","unstructured":"Rhinehart, N., Kitani, K.M., Vernaza, P.: R2p2: a reparameterized pushforward policy for diverse, precise generative path forecasting. In: The European Conference on Computer Vision (ECCV), September 2018","DOI":"10.1007\/978-3-030-01261-8_47"},{"key":"19_CR44","unstructured":"Ghasemipour, S.K.S., Gu, S., Zemel, R.: Understanding the relation between maximum-entropy inverse reinforcement learning and behaviour cloning. In: Workshop ICLR (2018)"},{"key":"19_CR45","unstructured":"Babes, M., Marivate, V.N., Subramanian, K., Littman, M.L.: Apprenticeship learning about multiple intentions. In: International Conference on Machine Learning, pp. 897\u2013904 (2011)"},{"key":"19_CR46","doi-asserted-by":"crossref","unstructured":"Dimitrakakis, C., Rothkopf, C.A.: Bayesian multitask inverse reinforcement learning. In: European Workshop on Reinforcement Learning, pp. 273\u2013284. Springer (2011)","DOI":"10.1007\/978-3-642-29946-9_27"},{"key":"19_CR47","unstructured":"Chen, X., Duan, Y., Houthooft, R., Schulman, J., Sutskever, I., Abbeel, P.: Infogan: interpretable representation learning by information maximizing generative adversarial nets. In: Advances in Neural Information Processing Systems, pp. 2172\u20132180 (2016)"},{"key":"19_CR48","unstructured":"Hausman, K., Chebotar, Y., Schaal, S., Sukhatme, G., Lim, J.J.: Multi-modal imitation learning from unstructured demonstrations using generative adversarial nets. In: Advances in Neural Information Processing Systems, pp. 1235\u20131245 (2017)"},{"key":"19_CR49","unstructured":"Lee, K., Choi, S. and Oh, S.: Maximum causal tsallis entropy imitation learning. In: Advances in Neural Information Processing Systems (2018)"},{"issue":"3","key":"19_CR50","doi-asserted-by":"publisher","first-page":"1466","DOI":"10.1109\/LRA.2018.2800085","volume":"3","author":"K Lee","year":"2018","unstructured":"Lee, K., Choi, S., Oh, S.: Sparse Markov decision processes with causal sparse tsallis entropy regularization for reinforcement learning. IEEE Robot. Autom. Lett. 3(3), 1466\u20131473 (2018)","journal-title":"IEEE Robot. Autom. Lett."},{"key":"19_CR51","unstructured":"Belousov, B., Peters, J.: f-divergence constrained policy improvement. arXiv preprint arXiv:1801.00056 (2017)"},{"key":"19_CR52","doi-asserted-by":"publisher","DOI":"10.1561\/9781933019543","volume-title":"Information Theory and Statistics: A Tutorial","author":"I Csisz\u00e1r","year":"2004","unstructured":"Csisz\u00e1r, I., Shields, P.C.: Information Theory and Statistics: A Tutorial. Now Publishers Inc, Norwell (2004)"},{"issue":"10","key":"19_CR53","doi-asserted-by":"publisher","first-page":"4394","DOI":"10.1109\/TIT.2006.881731","volume":"52","author":"F Liese","year":"2006","unstructured":"Liese, F., Vajda, I.: On divergences and informations in statistics and information theory. IEEE Trans. Inf. Theory 52(10), 4394\u20134412 (2006)","journal-title":"IEEE Trans. Inf. Theory"},{"issue":"3","key":"19_CR54","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1007\/s10994-011-5266-3","volume":"86","author":"T Kanamori","year":"2012","unstructured":"Kanamori, T., Suzuki, T., Sugiyama, M.: Statistical analysis of kernel-based least-squares density-ratio estimation. Mach. Learn. 86(3), 335\u2013367 (2012)","journal-title":"Mach. Learn."},{"key":"19_CR55","unstructured":"Zhang, M., Bird, T., Habib, R., Xu, T., Barber, D.: Variational f-divergence minimization. arXiv preprint arXiv:1907.11891 (2019)"},{"key":"19_CR56","unstructured":"Sun, W., Vemula, A., Boots, B., Bagnell, J.A.: Provably efficient imitation learning from observation alone. arXiv preprint arXiv:1905.10948 (2019)"}],"container-title":["Springer Proceedings in Advanced Robotics","Algorithmic Foundations of Robotics XIV"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-66723-8_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,16]],"date-time":"2022-12-16T01:28:42Z","timestamp":1671154122000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-66723-8_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030667221","9783030667238"],"references-count":56,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-66723-8_19","relation":{},"ISSN":["2511-1256","2511-1264"],"issn-type":[{"type":"print","value":"2511-1256"},{"type":"electronic","value":"2511-1264"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"9 February 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"WAFR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on the Algorithmic Foundations of Robotics","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Oulu","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Finland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 June 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 June 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"wafr2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/robotics.cs.rutgers.edu\/wafr2020\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}