{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T02:37:56Z","timestamp":1730255876230,"version":"3.28.0"},"reference-count":49,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T00:00:00Z","timestamp":1653264000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T00:00:00Z","timestamp":1653264000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,5,23]]},"DOI":"10.1109\/icra46639.2022.9812312","type":"proceedings-article","created":{"date-parts":[[2022,7,12]],"date-time":"2022-07-12T19:36:40Z","timestamp":1657654600000},"page":"6386-6393","source":"Crossref","is-referenced-by-count":28,"title":["Offline Meta-Reinforcement Learning for Industrial Insertion"],"prefix":"10.1109","author":[{"given":"Tony Z.","family":"Zhao","sequence":"first","affiliation":[{"name":"Work done as an intern at X, The Moonshot Factory,Mountain View,CA,USA"}]},{"given":"Jianlan","family":"Luo","sequence":"additional","affiliation":[{"name":"Intrinsic Innovation LLC,Mountain View,CA,USA"}]},{"given":"Oleg","family":"Sushkov","sequence":"additional","affiliation":[{"name":"Deepmind,London,UK"}]},{"given":"Rugile","family":"Pevceviciute","sequence":"additional","affiliation":[{"name":"Deepmind,London,UK"}]},{"given":"Nicolas","family":"Heess","sequence":"additional","affiliation":[{"name":"Deepmind,London,UK"}]},{"given":"Jon","family":"Scholz","sequence":"additional","affiliation":[{"name":"Deepmind,London,UK"}]},{"given":"Stefan","family":"Schaal","sequence":"additional","affiliation":[{"name":"Work done as an intern at X, The Moonshot Factory,Mountain View,CA,USA"}]},{"given":"Sergey","family":"Levine","sequence":"additional","affiliation":[{"name":"Google Brain,Mountain View,CA,USA"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref39","DOI":"10.1109\/ROBOT.1995.525545"},{"doi-asserted-by":"publisher","key":"ref38","DOI":"10.1017\/S026357479900185X"},{"year":"2021","author":"pong","journal-title":"Offline Meta-Reinforcement Learning with Online Self-Supervision","key":"ref33"},{"key":"ref32","article-title":"Offline Meta Reinforcement Learning","volume":"abs 2008 2598","author":"dorfman","year":"2020","journal-title":"ArXiv"},{"key":"ref31","article-title":"A Minimalist Approach to Offline Reinforcement Learning","volume":"abs 2106 6860","author":"fujimoto","year":"2021","journal-title":"ArXiv"},{"doi-asserted-by":"publisher","key":"ref30","DOI":"10.1109\/ICRA40945.2020.9196935"},{"key":"ref37","article-title":"Continuous control with deep reinforcement learning","volume":"abs 1509 2971","author":"lillicrap","year":"2016","journal-title":"CoRR"},{"key":"ref36","article-title":"Efficient Fully-Offline Meta-Reinforcement Learning via Distance Metric Learning and Behavior Regularization","volume":"abs 2010 1112","author":"li","year":"2021","journal-title":"ArXiv"},{"key":"ref35","volume":"abs 2008 2598","author":"dorfman","year":"2020","journal-title":"CoRR"},{"key":"ref34","article-title":"Offline Meta-Reinforcement Learning with Advantage Weighting","author":"mitchell","year":"2021","journal-title":"ICML"},{"year":"2020","author":"singh","journal-title":"COG Connecting New Skills to Past Experience with Offline Reinforcement Learning","key":"ref28"},{"year":"2020","author":"nair","journal-title":"AWAC Accelerating Online Reinforcement Learning With Offline Datasets","key":"ref27"},{"key":"ref29","article-title":"Offline Meta Learning of Exploration","author":"dorfman","year":"0","journal-title":"arXiv Learning (2020)"},{"year":"2021","journal-title":"Introducing Intrinsic","key":"ref2"},{"year":"2021","journal-title":"Global Robotics Market - Growth Trends COVID-19 Im-pact and Forecasts (2021-2026)","key":"ref1"},{"year":"2016","author":"duan","journal-title":"RL2 Fast reinforcement learning via slow reinforcement learning","key":"ref20"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1109\/CVPR.2019.00691"},{"key":"ref21","volume":"abs 1803 11347","author":"clavera","year":"2018","journal-title":"CoRR"},{"doi-asserted-by":"publisher","key":"ref24","DOI":"10.1109\/IROS45743.2020.9340848"},{"key":"ref23","volume":"abs 2010 13957","author":"zhao","year":"2020","journal-title":"CoRR"},{"year":"2020","author":"kumar","journal-title":"Conservative Q-Learning for Offline Reinforcement Learning","key":"ref26"},{"year":"2020","author":"levine","journal-title":"Offline reinforcement learning Tutorial review and perspectives on open problems","key":"ref25"},{"key":"ref10","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17 1","author":"levine","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref11","volume":"abs 1501 5611","author":"levine","year":"2015","journal-title":"CoRR"},{"doi-asserted-by":"publisher","key":"ref40","DOI":"10.1109\/IROS.2001.977187"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.1109\/ICRA.2018.8460756"},{"key":"ref13","volume":"abs 1708 4033","author":"inoue","year":"2017","journal-title":"Deep Reinforcement Learning for High Pre-cision Assembly Tasks"},{"key":"ref14","article-title":"Learning from the Hindsight Plan - Episodic MPC Im-provement","author":"tamar","year":"2016","journal-title":"ArXiv e-prints"},{"key":"ref15","article-title":"Leveraging Demonstrations for Deep Reinforcement Learning on Robotics Problems with Sparse Rewards","volume":"abs 1707 8817","author":"vecerik","year":"2017","journal-title":"ArXiv"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1109\/ICRA.2018.8460696"},{"key":"ref17","first-page":"1126","article-title":"Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks","volume":"70","author":"finn","year":"0","journal-title":"Proceedings of the 34th International Conference on Machine Learning"},{"key":"ref18","first-page":"5331","article-title":"Ef-ficient Off-Policy Meta-Reinforcement Learning via Prob-abilistic Context Variables","volume":"97","author":"rakelly","year":"0","journal-title":"Proceedings of the 36th International Conference on Machine Learning"},{"key":"ref19","article-title":"Meta-Reinforcement Learning of Structured Exploration Strategies","author":"gupta","year":"0","journal-title":"NeurIPS 2018"},{"key":"ref4","first-page":"722","volume":"4 5","author":"wang","year":"2018","journal-title":"The Future of Manufacturing A New Per-spective"},{"year":"2021","journal-title":"Alphabet to launch robotics firm Intrinsic under its other bets unit","key":"ref3"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.15607\/RSS.2021.XVII.088"},{"key":"ref5","article-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards","author":"vecerik","year":"2017","journal-title":"ArXiv Preprint"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1109\/ICRA.2019.8793506"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1109\/ICRA.2019.8794074"},{"key":"ref49","first-page":"357","article-title":"One-shot visual imitation learning via meta-learning","author":"finn","year":"2017","journal-title":"Conference on Robot Learning"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1109\/IROS45743.2020.9341714"},{"doi-asserted-by":"publisher","key":"ref46","DOI":"10.1109\/COASE.2016.7743375"},{"doi-asserted-by":"publisher","key":"ref45","DOI":"10.1109\/ROBOT.2001.932611"},{"key":"ref48","volume":"abs 1812 3201","author":"johannink","year":"2018","journal-title":"CoRR"},{"doi-asserted-by":"publisher","key":"ref47","DOI":"10.1109\/LRA.2021.3076971"},{"key":"ref42","doi-asserted-by":"crossref","first-page":"65","DOI":"10.1115\/1.3149634","volume":"104 1","author":"whitney","year":"1982","journal-title":"Journal of Dynamic Systems Measurement and Control"},{"doi-asserted-by":"publisher","key":"ref41","DOI":"10.1115\/1.3427095"},{"doi-asserted-by":"publisher","key":"ref44","DOI":"10.1109\/CARE.2013.6733716"},{"key":"ref43","first-page":"1","article-title":"Intuitive peg-in-hole assembly strategy with a compliant manipula-tor","author":"park","year":"2013","journal-title":"IEEE ISR 2013"}],"event":{"name":"2022 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2022,5,23]]},"location":"Philadelphia, PA, USA","end":{"date-parts":[[2022,5,27]]}},"container-title":["2022 International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9811522\/9811357\/09812312.pdf?arnumber=9812312","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,3]],"date-time":"2022-11-03T23:05:05Z","timestamp":1667516705000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9812312\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5,23]]},"references-count":49,"URL":"https:\/\/doi.org\/10.1109\/icra46639.2022.9812312","relation":{},"subject":[],"published":{"date-parts":[[2022,5,23]]}}}