{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,8]],"date-time":"2025-04-08T05:23:58Z","timestamp":1744089838515,"version":"3.40.3"},"reference-count":63,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1109\/icra57147.2024.10610040","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T17:51:05Z","timestamp":1723139465000},"page":"16961-16969","source":"Crossref","is-referenced-by-count":6,"title":["SERL: A Software Suite for Sample-Efficient Robotic Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Jianlan","family":"Luo","sequence":"first","affiliation":[{"name":"University of California,Department of EECS,Berkeley"}]},{"given":"Zheyuan","family":"Hu","sequence":"additional","affiliation":[{"name":"University of California,Department of EECS,Berkeley"}]},{"given":"Charles","family":"Xu","sequence":"additional","affiliation":[{"name":"University of California,Department of EECS,Berkeley"}]},{"given":"You Liang","family":"Tan","sequence":"additional","affiliation":[{"name":"University of California,Department of EECS,Berkeley"}]},{"given":"Jacob","family":"Berg","sequence":"additional","affiliation":[{"name":"University of Washington,Department of Computer Science"}]},{"given":"Archit","family":"Sharma","sequence":"additional","affiliation":[{"name":"Stanford University,Department of Computer Science"}]},{"given":"Stefan","family":"Schaal","sequence":"additional","affiliation":[{"name":"Intrinsic Innovation LLC"}]},{"given":"Chelsea","family":"Finn","sequence":"additional","affiliation":[{"name":"Stanford University,Department of Computer Science"}]},{"given":"Abhishek","family":"Gupta","sequence":"additional","affiliation":[{"name":"University of Washington,Department of Computer Science"}]},{"given":"Sergey","family":"Levine","sequence":"additional","affiliation":[{"name":"University of California,Department of EECS,Berkeley"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/tro.2022.3176207"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/icra48506.2021.9561384"},{"key":"ref3","article-title":"MT-Opt: Continuous Multi-Task Robotic Reinforcement Learning at Scale","volume-title":"CoRR","author":"Kalashnikov","year":"2021"},{"issue":"1","key":"ref4","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"Levine","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1177\/0278364917710318"},{"key":"ref6","article-title":"Dex-Net 2.0: Deep Learning to Plan Robust Grasps with Synthetic Point Clouds and Analytic Grasp Metrics","volume-title":"Robotics: Science and Systems XIII, Massachusetts Institute of Technology, Cambridge, Massachusetts, USA, July 12-16, 2017.","author":"Mahler","year":"2017"},{"article-title":"Leave no Trace: Learning to Reset for Safe and Autonomous Reinforcement Learning","volume-title":"6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings","author":"Eysenbach","key":"ref7"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-009-9120-4"},{"key":"ref9","article-title":"Lyapunov design for robust and efficient robotic reinforcement learning","author":"Westenbroek","year":"2022","journal-title":"arXiv preprint arXiv:2208.06721"},{"key":"ref10","first-page":"1","article-title":"Data efficient reinforcement learning for legged robots","volume-title":"Conference on Robot Learning","author":"Yang"},{"article-title":"A framework for efficient robotic manipulation","volume-title":"Deep RL Workshop NeurIPS 2021","author":"Zhan","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TIE.2020.3038072"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560764"},{"key":"ref14","article-title":"Data-efficient deep reinforcement learning for dexterous manipulation","author":"Popov","year":"2017","journal-title":"arXiv preprint arXiv:1704.03073"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793506"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9812312"},{"key":"ref17","article-title":"REBOOT: Reuse Data for Bootstrapping Efficient Real-World Dexterous Manipulation","author":"Hu","year":"2024","journal-title":"arXiv preprint arXiv:2309.03322"},{"key":"ref18","article-title":"Residual Reinforcement Learning for Robot Control","volume-title":"CoRR","author":"Johannink","year":"2018"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9341714"},{"key":"ref20","article-title":"Demonstrating A Walk in the Park: Learning to Walk in 20 Minutes With Model-Free Reinforcement Learning","volume-title":"Robotics: Science and Systems XIX, Daegu, Republic of Korea, July 10-14, 2023.","author":"Kostrikov","year":"2023"},{"key":"ref21","article-title":"RLIF: Interactive Imitation Learning as Reinforcement Learning","author":"Luo","year":"2023","journal-title":"arXiv preprint arXiv:2311.12996"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.021"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-012-5322-7"},{"key":"ref24","first-page":"2226","article-title":"DayDreamer: World Models for Physical Robot Learning","volume-title":"Conference on Robot Learning, CoRL 2022, 14-18 December 2022, Auckland, New Zealand","volume":"205","author":"Wu"},{"key":"ref25","first-page":"1101","article-title":"Deep Dynamics Models for Learning Dexterous Manipulation","volume-title":"3rd Annual Conference on Robot Learning, CoRL 2019, Osaka, Japan, October 30 - November 1, 2019, Proceedings","volume":"100","author":"Nagabandi"},{"key":"ref26","first-page":"1154","article-title":"Offline Reinforcement Learning from Images with Latent Space Models","volume-title":"Proceedings of the 3rd Annual Conference on Learning for Dynamics and Control, L4DC 2021, 7-8 June 2021, Virtual Event, Switzerland","volume":"144","author":"Rafailov"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8594353"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/icra.2019.8794102"},{"key":"ref29","article-title":"Variational inverse control with events: A general framework for data-driven reward definition","volume":"31","author":"Fu","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref30","first-page":"6346","article-title":"MURAL: Meta-Learning Uncertainty-Aware Rewards for Outcome-Driven Reinforcement Learning","volume-title":"Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18-24 July 2021, Virtual Event","volume":"139","author":"Li"},{"key":"ref31","article-title":"Vision-language models as success detectors","author":"Du","year":"2023","journal-title":"arXiv preprint arXiv:2303.07280"},{"key":"ref32","first-page":"14743","article-title":"Zero-Shot Reward Specification via Grounded Natural Language","volume-title":"International Conference on Machine Learning, ICML 2022, 17-23 July 2022, Baltimore, Maryland, USA","volume":"162","author":"Mahmoudieh"},{"key":"ref33","article-title":"MineDojo: Building Open-Ended Embodied Agents with Internet-Scale Knowledge","volume-title":"NeurIPS.","author":"Fan","year":"2022"},{"article-title":"VIP: Towards Universal Visual Reward and Representation via Value-Implicit Pre-Training","volume-title":"The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1-5, 2023","author":"Ma","key":"ref34"},{"key":"ref35","first-page":"23301","article-title":"LIV: Language-Image Representations and Rewards for Robotic Control","volume-title":"International Conference on Machine Learning, ICML 2023, 23-29 July 2023, Honolulu, Hawaii, USA","volume":"202","author":"Ma"},{"key":"ref36","article-title":"Autonomous Reinforcement Learn ing: Benchmarking and Formalism","author":"Sharma","year":"2021","journal-title":"arXiv preprint arXiv:2112.09605"},{"article-title":"The Ingredients of Real World Robotic Reinforcement Learning","volume-title":"8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, April 26-30, 2020","author":"Zhu","key":"ref37"},{"key":"ref38","article-title":"When to Ask for Help: Proactive Interventions in Autonomous Reinforcement Learning","volume-title":"NeurIPS.","author":"Xie","year":"2022"},{"key":"ref39","article-title":"Self-Improving Robots: End-to-End Autonomous Visuomotor Reinforcement Learning","volume-title":"CoRR","author":"Sharma","year":"2023"},{"issue":"315","key":"ref40","first-page":"1","article-title":"d3rlpy: An Offline Deep Reinforcement Learning Library","volume-title":"Journal of Machine Learning Research","volume":"23","author":"Seno","year":"2022"},{"article-title":"rlkit","volume-title":"Github","author":"Nair","key":"ref41"},{"volume-title":"Stable Baselines.","year":"2018","author":"Hill","key":"ref42"},{"volume-title":"TF-Agents: A library for Reinforcement Learning in TensorFlow.","year":"2018","author":"Guadarrama","key":"ref43"},{"key":"ref44","article-title":"Efficient online reinforcement learning with offline data","author":"Ball","year":"2023","journal-title":"arXiv preprint arXiv:2302.02948"},{"key":"ref45","first-page":"1094","article-title":"Meta-World: A Benchmark and Evaluation for Multi-Task and Meta Reinforcement Learning","volume-title":"3rd Annual Conference on Robot Learning, CoRL 2019, Osaka, Japan, October 30 - November 1, 2019, Proceedings","volume":"100","author":"Yu"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/lra.2020.2974707"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3270034"},{"key":"ref48","first-page":"1300","article-title":"ROBEL: Robotics Benchmarks for Learning with Low-Cost Robots","volume-title":"3rd Annual Conference on Robot Learning, CoRL 2019, Osaka, Japan, October 30 - November 1, 2019, Proceedings","volume":"100","author":"Ahn"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2023.xix.041"},{"key":"ref50","first-page":"39:1","article-title":"End-to-End Training of Deep Visuomotor Policies","volume-title":"J. Mach. Learn. Res.","volume":"17","author":"Levine","year":"2016"},{"key":"ref51","first-page":"1008","article-title":"Actor-Critic Algorithms","volume-title":"Advances in Neural Information Processing Systems 12, [NIPS Conference, Denver, Colorado, USA, November 29 - December 4, 1999]","author":"Konda"},{"key":"ref52","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"International Conference on machine learning","author":"Haarnoja"},{"key":"ref53","first-page":"1582","article-title":"Addressing Function Approximation Error in Actor-Critic Methods","volume-title":"Proceedings of the 35th International Conference on Machine Learning, ICML 2018, Stockholmsm\u00e4ssan, Stockholm, Sweden, July 10-15, 2018","volume":"80","author":"Fujimoto"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.049"},{"journal-title":"Accelerating Online Reinforcement Learning with Offline Datasets.","year":"2020","author":"Nair","key":"ref55"},{"key":"ref56","article-title":"Playing atari with deep reinforcement learning","author":"Mnih","year":"2013","journal-title":"arXiv preprint arXiv:1312.5602"},{"key":"ref57","article-title":"Generative adversarial nets","volume":"27","author":"Goodfellow","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2015.7354297"},{"key":"ref59","article-title":"Openai gym","author":"Brockman","year":"2016","journal-title":"arXiv preprint arXiv:1606.01540"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/icra.2019.8794074"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2021.xvii.088"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3076971"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2016.90"}],"event":{"name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2024,5,13]]},"location":"Yokohama, Japan","end":{"date-parts":[[2024,5,17]]}},"container-title":["2024 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10609961\/10609862\/10610040.pdf?arnumber=10610040","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T17:54:26Z","timestamp":1743789266000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10610040\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":63,"URL":"https:\/\/doi.org\/10.1109\/icra57147.2024.10610040","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]}}}