{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T08:28:16Z","timestamp":1743064096994,"version":"3.37.3"},"reference-count":52,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2021,2,1]],"date-time":"2021-02-01T00:00:00Z","timestamp":1612137600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,2,1]],"date-time":"2021-02-01T00:00:00Z","timestamp":1612137600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,2,1]],"date-time":"2021-02-01T00:00:00Z","timestamp":1612137600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"Key Projects of Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61533012","91748120","61521063"],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Cybern."],"published-print":{"date-parts":[[2021,2]]},"DOI":"10.1109\/tcyb.2019.2949596","type":"journal-article","created":{"date-parts":[[2019,11,12]],"date-time":"2019-11-12T22:03:44Z","timestamp":1573596224000},"page":"1056-1069","source":"Crossref","is-referenced-by-count":35,"title":["Task-Oriented Deep Reinforcement Learning for Robotic Skill Acquisition and Control"],"prefix":"10.1109","volume":"51","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4934-4237","authenticated-orcid":false,"given":"Guofei","family":"Xiang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6931-5842","authenticated-orcid":false,"given":"Jianbo","family":"Su","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","first-page":"1433","article-title":"Maximum entropy inverse reinforcement learning","volume":"8","author":"ziebart","year":"2008","journal-title":"Proc AAAI Conf Artif Intell"},{"doi-asserted-by":"publisher","key":"ref38","DOI":"10.1145\/1143844.1143936"},{"doi-asserted-by":"publisher","key":"ref33","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"ref32","first-page":"2721","article-title":"Count-based exploration with neural density models","author":"ostrovski","year":"2017","journal-title":"Proc 34th Int Conf Mach Learn"},{"doi-asserted-by":"publisher","key":"ref31","DOI":"10.1016\/j.artint.2015.02.001"},{"key":"ref30","first-page":"1109","article-title":"VIME: Variational information maximizing exploration","author":"houthooft","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"doi-asserted-by":"publisher","key":"ref37","DOI":"10.1145\/1015330.1015430"},{"key":"ref36","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","volume":"1","author":"ng","year":"2000","journal-title":"Proc 17th Int Conf Mach Learn"},{"year":"2015","author":"bagnell","article-title":"An invitation to imitation","key":"ref35"},{"key":"ref34","first-page":"3223","article-title":"Deep Q-learning from demonstrations","author":"hester","year":"2018","journal-title":"Proc 32nd AAAI Conf Artif Intell"},{"key":"ref28","first-page":"2775","article-title":"Bridging the gap between value and policy based reinforcement learning","author":"nachum","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref27","first-page":"1856","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"Proc 35th Int Conf Mach Learn"},{"key":"ref29","first-page":"2094","article-title":"Deep reinforcement learning with double Q-learning","author":"van hasselt","year":"2016","journal-title":"Proc 30th AAAI Conf Artif Intell"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1177\/0278364913495721"},{"year":"2018","author":"sutton","journal-title":"Reinforcement Learning An Introduction","key":"ref1"},{"key":"ref20","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of Go with deep neural networks and tree search","volume":"529","author":"silver","year":"2016","journal-title":"Nature"},{"key":"ref22","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"arXiv preprint arXiv 1509 02971"},{"key":"ref21","first-page":"1329","article-title":"Benchmarking deep reinforcement learning for continuous control","author":"duan","year":"2016","journal-title":"Proc 33rd Int Conf Mach Learn"},{"key":"ref24","first-page":"1054","article-title":"Safe and efficient off-policy reinforcement learning","author":"munos","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref23","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"Proc 33rd Int Conf Mach Learn"},{"key":"ref26","first-page":"1352","article-title":"Reinforcement learning with deep energy-based policies","author":"haarnoja","year":"2017","journal-title":"Proc 34th Int Conf Mach Learn"},{"key":"ref25","first-page":"3207","article-title":"Deep reinforcement learning that matters","author":"henderson","year":"2018","journal-title":"Proc 32nd AAAI Conf Artif Intell"},{"doi-asserted-by":"publisher","key":"ref50","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref51","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014","journal-title":"arXiv preprint arXiv 1412 6980"},{"key":"ref52","article-title":"OpenAI gym","author":"brockman","year":"2016","journal-title":"arXiv preprint arXiv 1606 01540"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.15607\/RSS.2018.XIV.009"},{"key":"ref11","article-title":"Dexterous manipulation with deep reinforcement learning: Efficient, general, and low-cost","author":"zhu","year":"2018","journal-title":"arXiv preprint arXiv 1810 06008"},{"doi-asserted-by":"publisher","key":"ref40","DOI":"10.1007\/s10514-009-9121-3"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.1016\/j.robot.2008.10.024"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.1126\/scirobotics.aau4984"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1177\/0278364917710318"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.1109\/ICRA.2017.7989202"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1109\/TIV.2016.2578706"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1177\/0278364910371999"},{"doi-asserted-by":"publisher","key":"ref18","DOI":"10.1109\/ICRA.2016.7487175"},{"key":"ref19","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.1145\/3054912"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1561\/2300000021","article-title":"A survey on policy search for robotics","volume":"2","author":"deisenroth","year":"2013","journal-title":"Foundations and Trends in Robotics"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","article-title":"Deep learning","volume":"521","author":"lecun","year":"2015","journal-title":"Nature"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1561\/2300000053"},{"key":"ref8","first-page":"1889","article-title":"Trust region policy optimization","volume":"37","author":"schulman","year":"2015","journal-title":"Proc 31st Int Conf Mach Learn"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1126\/scirobotics.aau5872"},{"key":"ref49","first-page":"2058","article-title":"Learning to search better than your teacher","author":"chang","year":"2015","journal-title":"Proc 32nd Int Conf Mach Learn"},{"key":"ref9","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"J Mach Learn Res"},{"key":"ref46","article-title":"Reinforcement and imitation learning via interactive no-regret learning","author":"ross","year":"2014","journal-title":"arXiv preprint arXiv 1406 5979"},{"key":"ref45","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","author":"ross","year":"2011","journal-title":"Proc 14th Int Conf Artificial Intell"},{"doi-asserted-by":"publisher","key":"ref48","DOI":"10.1007\/s10994-009-5106-x"},{"key":"ref47","first-page":"3309","article-title":"Deeply aggreVaTeD: Differentiable imitation learning for sequential prediction","author":"sun","year":"2017","journal-title":"Proc 34th Int Conf Mach Learn"},{"key":"ref42","first-page":"4565","article-title":"Generative adversarial imitation learning","author":"ho","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref41","first-page":"2672","article-title":"Generative adversarial nets","author":"goodfellow","year":"2014","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref44","first-page":"3812","article-title":"InfoGAIL: Interpretable imitation learning from visual demonstrations","author":"li","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref43","first-page":"1235","article-title":"Multi-modal imitation learning from unstructured demonstrations using generative adversarial nets","author":"hausman","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"}],"container-title":["IEEE Transactions on Cybernetics"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6221036\/9325889\/08897016.pdf?arnumber=8897016","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T14:53:35Z","timestamp":1652194415000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8897016\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,2]]},"references-count":52,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tcyb.2019.2949596","relation":{},"ISSN":["2168-2267","2168-2275"],"issn-type":[{"type":"print","value":"2168-2267"},{"type":"electronic","value":"2168-2275"}],"subject":[],"published":{"date-parts":[[2021,2]]}}}