{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T05:29:15Z","timestamp":1730266155096,"version":"3.28.0"},"reference-count":24,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,7]]},"DOI":"10.1109\/ijcnn48605.2020.9207008","type":"proceedings-article","created":{"date-parts":[[2020,9,29]],"date-time":"2020-09-29T20:40:33Z","timestamp":1601412033000},"page":"1-7","source":"Crossref","is-referenced-by-count":2,"title":["Latent Context Based Soft Actor-Critic"],"prefix":"10.1109","author":[{"given":"Yuan","family":"Pu","sequence":"first","affiliation":[]},{"given":"Shaochen","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Xin","family":"Yao","sequence":"additional","affiliation":[]},{"given":"Bin","family":"Li","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","article-title":"Probabilistic model-agnostic meta-learning","author":"finn","year":"2018","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"article-title":"Brains, behavior, and robotics","year":"1981","author":"albus","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2008.10.024"},{"key":"ref13","article-title":"Algorithms for inverse reinforcement learning","author":"ng","year":"2000","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref14","article-title":"Extrapolating beyond suboptimal demonstrations via inverse reinforcement learning from observations","author":"brown","year":"2019","journal-title":"Proceedings of International Conference on Machine Learning (ICML)"},{"article-title":"Modeling purposeful adaptive behavior with the principle of maximum causal entropy","year":"2010","author":"ziebart","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-74690-4_71"},{"article-title":"Memory-based control with recurrent neural networks","year":"2015","author":"heess","key":"ref17"},{"article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","year":"2017","author":"finn","key":"ref18"},{"key":"ref19","article-title":"Meta reinforcement learning of structured exploration strategies","author":"gupta","year":"2018","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"article-title":"Proximal policy optimization algorithms","year":"2017","author":"schulman","key":"ref4"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref6","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref5","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2016","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref8","article-title":"Efficient off-policy Meta-reinforcement learning via probabilistic context variables","author":"rakelly","year":"2019","journal-title":"Proceedings of International Conference on Machine Learning (ICML)"},{"article-title":"Deep recurrent Q-Learning for partially observable MDPs","year":"2015","author":"hausknecht","key":"ref7"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref9","article-title":"Learning an embedding space for transferable robot skills","author":"hausman","year":"2018","journal-title":"International Conference on Learning Representations (ICLR)"},{"journal-title":"Reinforcement Learning An Introduction","year":"2018","author":"sutton","key":"ref1"},{"key":"ref20","article-title":"Re-current experience replay in distributed reinforcement learning","author":"kapturowski","year":"2019","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref21","article-title":"Transfer learning for reinforcement learning domains: A survey","author":"taylor","year":"2009","journal-title":"The Journal of Machine Learning Research"},{"key":"ref24","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2015","journal-title":"Proc of the Int Conf on Learning Representations (ICLR)"},{"article-title":"OpenAI gym","year":"2016","author":"brockman","key":"ref23"}],"event":{"name":"2020 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2020,7,19]]},"location":"Glasgow, United Kingdom","end":{"date-parts":[[2020,7,24]]}},"container-title":["2020 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9200848\/9206590\/09207008.pdf?arnumber=9207008","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,28]],"date-time":"2022-06-28T17:52:28Z","timestamp":1656438748000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9207008\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/ijcnn48605.2020.9207008","relation":{},"subject":[],"published":{"date-parts":[[2020,7]]}}}