{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T20:34:44Z","timestamp":1725741284573},"reference-count":70,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1109\/cvpr52729.2023.01323","type":"proceedings-article","created":{"date-parts":[[2023,8,22]],"date-time":"2023-08-22T13:30:52Z","timestamp":1692711052000},"page":"13767-13777","source":"Crossref","is-referenced-by-count":1,"title":["Galactic: Scaling End-to-End Reinforcement Learning for Rearrangement at 100k Steps-Per-Second"],"prefix":"10.1109","author":[{"given":"Vincent-Pierre","family":"Berges","sequence":"first","affiliation":[{"name":"Meta AI (FAIR)"}]},{"given":"Andrew","family":"Szot","sequence":"additional","affiliation":[{"name":"Georgia Tech"}]},{"given":"Devendra Singh","family":"Chaplot","sequence":"additional","affiliation":[{"name":"Meta AI (FAIR)"}]},{"given":"Aaron","family":"Gokaslan","sequence":"additional","affiliation":[{"name":"Cornell University"}]},{"given":"Roozbeh","family":"Mottaghi","sequence":"additional","affiliation":[{"name":"Meta AI (FAIR)"}]},{"given":"Dhruv","family":"Batra","sequence":"additional","affiliation":[{"name":"Meta AI (FAIR), Georgia Tech"}]},{"given":"Eric","family":"Undersander","sequence":"additional","affiliation":[{"name":"Meta AI (FAIR)"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811809"},{"key":"ref57","article-title":"Rethinking sim2real: Lower fidelity simulation leads to higher sim2real transfer in navigation","author":"truong","year":"2022","journal-title":"CoRL"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"journal-title":"Habitat rearrangement challenge 2022","year":"2022","author":"szot","key":"ref56"},{"key":"ref15","article-title":"Impala: Scalable distributed deep-rl with importance weighted actor-learner architectures","author":"espeholt","year":"2018","journal-title":"ICML"},{"key":"ref59","article-title":"Ver: Scaling on-policy rl leads to the emergence of navigation in embodied rearrangement","author":"wijmans","year":"2022","journal-title":"ArXiv"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00447"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00586"},{"key":"ref53","article-title":"Behavior: Benchmark for everyday household activities in virtual, interactive, and ecological environments","author":"srivastava","year":"2021","journal-title":"CoRL"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01075"},{"key":"ref11","article-title":"Procthor: Large-scale embodied ai using procedural generation","author":"deitke","year":"2022","journal-title":"NeurIPS"},{"key":"ref55","article-title":"Habitat 2.0: Training home assistants to rearrange their habitat","author":"szot","year":"2021","journal-title":"NeurIPS"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00323"},{"key":"ref54","article-title":"Accelerated methods for deep reinforcement learning","author":"stooke","year":"2018","journal-title":"ArXiv"},{"key":"ref17","article-title":"Threedworld: A platform for interactive multi- modal physical simulation","author":"gan","year":"2021","journal-title":"NeurIPS (dataset track)"},{"key":"ref16","article-title":"Brax - a differentiable physics engine for large scale rigid body simulation","author":"freeman","year":"2021","journal-title":"NeurIPS Datasets and Benchmarks Track"},{"key":"ref19","article-title":"Multi-skill mobile manipulation for object rearrangement","author":"gu","year":"2023","journal-title":"ICLRE"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00430"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636667"},{"key":"ref50","article-title":"Loss is its own reward: Self-supervision for reinforcement learning","author":"shelhamer","year":"2017","journal-title":"ArXiv"},{"key":"ref46","article-title":"Mid-level visual representations improve generalization and sample efficiency for learning visuomotor policies","author":"sax","year":"2018","journal-title":"ArXiv"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00943"},{"key":"ref48","article-title":"Large batch simulation for deep reinforcement learning","author":"shacklett","year":"2021","journal-title":"ICLRE"},{"key":"ref47","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"ArXiv"},{"key":"ref42","article-title":"Zero-shot text-to-image generation","author":"ramesh","year":"2021","journal-title":"ICML"},{"key":"ref41","article-title":"Habitat-matterport 3d dataset (hm3d): 1000 large-scale 3d environments for embodied ai","author":"santhosh","year":"2021","journal-title":"ArXiv"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"journal-title":"Fetcher","year":"0","author":"robotics","key":"ref43"},{"key":"ref49","article-title":"RRL: resnet as representation for reinforcement learning","author":"shah","year":"2021","journal-title":"ICML"},{"key":"ref8","article-title":"Embodied question answering","author":"das","year":"2018","journal-title":"CVPR"},{"journal-title":"Pybullet a python module for physics simulation for games robotics and machine learning","year":"0","author":"coumans","key":"ref7"},{"key":"ref9","article-title":"Pilco: A model-based and data-efficient approach to policy search","author":"deisenroth","year":"2011","journal-title":"ICML"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICAR.2015.7251504"},{"key":"ref3","article-title":"Language models are few-shot learners","author":"brown","year":"2020","journal-title":"ArXiv"},{"key":"ref6","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","author":"chua","year":"2018","journal-title":"NeurIPS"},{"key":"ref5","article-title":"Object goal navigation using goal-oriented semantic exploration","author":"chaplot","year":"2020","journal-title":"NeurIPS"},{"key":"ref40","article-title":"Language models are unsupervised multitask learners","author":"radford","year":"2019","journal-title":"ArXiv"},{"key":"ref35","article-title":"Massively parallel methods for deep reinforcement learning","author":"nair","year":"2015","journal-title":"ArXiv"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"ref37","article-title":"Megaverse: Simulating embodied agents at one million experiences per second","author":"petrenko","year":"2021","journal-title":"ICML"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref31","article-title":"Learning to navigate in complex environments","author":"mirowski","year":"2017","journal-title":"ICLRE"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01509"},{"key":"ref33","article-title":"Data-efficient hierarchical reinforcement learning","author":"nachum","year":"2018","journal-title":"NeurIPS"},{"key":"ref32","article-title":"Safe and efficient off-policy reinforcement learning","author":"munos","year":"2016","journal-title":"NeurIPS"},{"key":"ref2","article-title":"Rearrangement: A challenge for embodied ai","author":"batra","year":"2020","journal-title":"ArXiv"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00387"},{"key":"ref39","article-title":"Learning transferable visual models from natural language supervision","author":"radford","year":"2021","journal-title":"ArXiv"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00886"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981055"},{"key":"ref24","article-title":"Distributed prioritized experience replay","author":"horgan","year":"2018","journal-title":"ICLRE"},{"key":"ref68","article-title":"Auxiliary tasks speed up learning pointgoal navigation","author":"ye","year":"2020","journal-title":"CoRL"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref67","article-title":"Menger: Massively large-scale distributed reinforcement learning","author":"yazdanbakhsh","year":"0","journal-title":"NeurIPS Beyond Backpropagation Workshop"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2016.7860433"},{"key":"ref25","article-title":"Reinforcement learning with unsupervised auxiliary tasks","author":"jaderberg","year":"2017","journal-title":"ICLRE"},{"key":"ref69","article-title":"Learning to see before learning to act: Visual pre-training for manipulation","author":"lin","year":"2020","journal-title":"ICRA"},{"key":"ref20","article-title":"Continuous deep q-learning with model-based acceleration","author":"gu","year":"2016","journal-title":"ICML"},{"key":"ref64","article-title":"Accelerated policy learning with parallel differentiable simulation","author":"xu","year":"2022","journal-title":"ICLRE"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01111"},{"key":"ref22","article-title":"Learning latent dynamics for planning from pixels","author":"hafner","year":"2019","journal-title":"ICML"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17276"},{"key":"ref21","article-title":"Mastering atari with discrete world models","author":"hafner","year":"2021","journal-title":"ICLRE"},{"key":"ref65","article-title":"Offline visual representation learning for embodied navigation","author":"yadav","year":"2022","journal-title":"ArXiv"},{"key":"ref28","article-title":"igibson 2.0: Object-centric simulation for robot learning of everyday household tasks","author":"li","year":"2021","journal-title":"ArXiv"},{"key":"ref27","article-title":"Ai2-thor: An interactive 3d environment for visual ai","author":"kolve","year":"2017","journal-title":"ArXiv"},{"key":"ref29","article-title":"Isaac gym: High performance gpu-based physics simulation for robot learning","author":"makoviychuk","year":"2021","journal-title":"ArXiv"},{"key":"ref60","article-title":"Dd-ppo: Learning near-perfect pointgoal navigators from 2.5 billion frames","author":"wijmans","year":"2020","journal-title":"ICLRE"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00945"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00691"}],"event":{"name":"2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","start":{"date-parts":[[2023,6,17]]},"location":"Vancouver, BC, Canada","end":{"date-parts":[[2023,6,24]]}},"container-title":["2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10203037\/10203050\/10203914.pdf?arnumber=10203914","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,11]],"date-time":"2023-09-11T13:55:25Z","timestamp":1694440525000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10203914\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6]]},"references-count":70,"URL":"https:\/\/doi.org\/10.1109\/cvpr52729.2023.01323","relation":{},"subject":[],"published":{"date-parts":[[2023,6]]}}}