{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T07:58:20Z","timestamp":1725695900886},"reference-count":40,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"8","license":[{"start":{"date-parts":[[2017,8,1]],"date-time":"2017-08-01T00:00:00Z","timestamp":1501545600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2017,8]]},"DOI":"10.1109\/tnnls.2016.2543000","type":"journal-article","created":{"date-parts":[[2016,5,4]],"date-time":"2016-05-04T18:30:01Z","timestamp":1462386601000},"page":"1814-1826","source":"Crossref","is-referenced-by-count":51,"title":["Bridging the Gap Between Imitation Learning and Inverse Reinforcement Learning"],"prefix":"10.1109","volume":"28","author":[{"ORCID":"http:\/\/orcid.org\/0000-0002-6456-7183","authenticated-orcid":false,"given":"Bilal","family":"Piot","sequence":"first","affiliation":[]},{"given":"Matthieu","family":"Geist","sequence":"additional","affiliation":[]},{"given":"Olivier","family":"Pietquin","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","first-page":"1107","article-title":"Least-squares policy iteration","volume":"4","author":"lagoudakis","year":"2003","journal-title":"J Mach Learn Res"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143936"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/1102351.1102464"},{"key":"ref32","first-page":"349","article-title":"Analysis of inverse reinforcement learning with perturbed demonstrations","author":"melo","year":"2010","journal-title":"Proc ECAI"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1023\/A:1018946025316"},{"key":"ref30","first-page":"568","article-title":"Error propagation for approximate policy and value iteration","author":"farahmand","year":"2010","journal-title":"Proc NIPS"},{"key":"ref37","author":"vapnik","year":"1998","journal-title":"Statistical Learning Theory"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-29946-9_28"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-006-6226-1"},{"key":"ref34","author":"breiman","year":"1993","journal-title":"Classification and Regression Trees"},{"key":"ref10","first-page":"2253","article-title":"A reduction from apprenticeship learning to classification","author":"syed","year":"2010","journal-title":"Proc NIPS"},{"key":"ref40","first-page":"503","article-title":"Tree-based batch mode reinforcement learning","volume":"6","author":"ernst","year":"2005","journal-title":"J Mach Learn Res"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/279943.279964"},{"key":"ref12","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","author":"ng","year":"2000","journal-title":"Proc ICML"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.21236\/ADA528601"},{"key":"ref14","first-page":"1414","article-title":"Imitation learning in relational domains: A functional-gradient boosting approach","author":"natarajan","year":"2011","journal-title":"Proc AAAI"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-40988-2_2"},{"key":"ref16","first-page":"1249","article-title":"Boosted and reward-regularized classification for apprenticeship learning","author":"piot","year":"2014","journal-title":"Proc AAMAS"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15883-4_25"},{"key":"ref18","first-page":"162","article-title":"Metrics for finite Markov decision processes","author":"ferns","year":"2004","journal-title":"Proc UAI"},{"key":"ref19","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","author":"ross","year":"2011","journal-title":"Proc AISTATS"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-40988-2_1"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2008.10.024"},{"key":"ref27","first-page":"1007","article-title":"Inverse reinforcement learning through structured classification","author":"klein","year":"2012","journal-title":"Proc NIPS"},{"key":"ref3","first-page":"1040","article-title":"Learning from demonstration","author":"schaal","year":"1997","journal-title":"Proc NIPS"},{"key":"ref6","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","author":"puterman","year":"1994","journal-title":"Markov Decision Processes Discrete Stochastic Dynamic Programming"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.2307\/2584329"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/2493525.2493529"},{"key":"ref8","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref7","volume":"1","author":"bertsekas","year":"1995","journal-title":"Dynamic Programming and Optimal Control"},{"key":"ref2","first-page":"12","article-title":"Robot learning from demonstration","author":"atkeson","year":"1997","journal-title":"Proc ICML"},{"key":"ref9","first-page":"1","article-title":"Efficient reductions for imitation learning","author":"ross","year":"2010","journal-title":"Proc 13th AISTATS"},{"key":"ref1","first-page":"305","article-title":"ALVINN: An autonomous land vehicle in a neural network","author":"pomerleau","year":"1989","journal-title":"Proc NIPS"},{"key":"ref20","first-page":"19","article-title":"Active imitation learning via reduction to I.I.D. active learning","author":"judah","year":"2012","journal-title":"Proc UAI"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref21","first-page":"1","article-title":"Inverse reinforcement learning in relational domains","author":"munzer","year":"2015","journal-title":"Proc IJCAI"},{"key":"ref24","first-page":"1433","article-title":"Maximum entropy inverse reinforcement learning","author":"ziebart","year":"2008","journal-title":"Proc AAAI"},{"key":"ref23","first-page":"1","article-title":"A game-theoretic approach to apprenticeship learning","author":"syed","year":"2008","journal-title":"Proc NIPS"},{"key":"ref26","first-page":"182","article-title":"Relative entropy inverse reinforcement learning","author":"boularias","year":"2011","journal-title":"Proc AISTATS"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-009-5110-1"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/7982830\/07464854.pdf?arnumber=7464854","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,19]],"date-time":"2022-06-19T21:39:34Z","timestamp":1655674774000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7464854\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,8]]},"references-count":40,"journal-issue":{"issue":"8"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2016.2543000","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,8]]}}}