{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,12,27]],"date-time":"2024-12-27T05:15:47Z","timestamp":1735276547832,"version":"3.32.0"},"reference-count":41,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,10,14]],"date-time":"2024-10-14T00:00:00Z","timestamp":1728864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,10,14]],"date-time":"2024-10-14T00:00:00Z","timestamp":1728864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,10,14]]},"DOI":"10.1109\/iros58592.2024.10801683","type":"proceedings-article","created":{"date-parts":[[2024,12,25]],"date-time":"2024-12-25T19:17:39Z","timestamp":1735154259000},"page":"8539-8546","source":"Crossref","is-referenced-by-count":0,"title":["From LLMs to Actions: Latent Codes as Bridges in Hierarchical Robot Control"],"prefix":"10.1109","author":[{"given":"Yide","family":"Shentu","sequence":"first","affiliation":[{"name":"University of California,Berkeley"}]},{"given":"Philipp","family":"Wu","sequence":"additional","affiliation":[{"name":"University of California,Berkeley"}]},{"given":"Aravind","family":"Rajeswaran","sequence":"additional","affiliation":[{"name":"University of California,Berkeley"}]},{"given":"Pieter","family":"Abbeel","sequence":"additional","affiliation":[{"name":"University of California,Berkeley"}]}],"member":"263","reference":[{"journal-title":"Rt-2: Vision-language-action models transfer web knowledge to robotic control","year":"2023","author":"B","key":"ref1"},{"journal-title":"Moka: Open-vocabulary robotic manipulation through mark-based visual prompting","year":"2024","author":"Liu","key":"ref2"},{"volume-title":"Language-conditioned imitation learning for robot manipulation tasks","year":"2020","author":"Stepputtis","key":"ref3"},{"key":"ref4","first-page":"39:1","article-title":"End-to-end training of deep visuomotor policies","volume-title":"J. Mach. Learn. Res.","volume":"17","author":"Levine","year":"2015"},{"key":"ref5","doi-asserted-by":"crossref","DOI":"10.1109\/LRA.2022.3196123","volume-title":"What matters in language conditioned robotic imitation learning over unstructured data","author":"Mees","year":"2022"},{"year":"2023","key":"ref6","article-title":"Gpt-4 technical report"},{"journal-title":"Llama: Open and efficient foundation language models","year":"2023","author":"Touvron","key":"ref7"},{"journal-title":"Do as I can, not as I say: Grounding language in robotic affordances","year":"2022","author":"Ahn","key":"ref8"},{"journal-title":"Language models as zero-shot planners: Extracting actionable knowledge for embodied agents","year":"2022","author":"Huang","key":"ref9"},{"journal-title":"Code as policies: Language model programs for embodied control","year":"2022","author":"Liang","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2024.XX.091"},{"journal-title":"Interactive language: Talking to robots in real time","year":"2022","author":"Lynch","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3180108"},{"key":"ref14","article-title":"Large language models for robotics: A survey","volume-title":"ArXiv","volume":"abs\/2311.07226","author":"Zeng","year":"2023"},{"key":"ref15","article-title":"Chatgpt for robotics: Design principles and model abilities","volume-title":"ArXiv","volume":"abs\/2306.17582","author":"Vemprala","year":"2023"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161317"},{"article-title":"Interactive task planning with language models","year":"2023","author":"Li","key":"ref17"},{"article-title":"LoRA: Low-rank adaptation of large language models","volume-title":"International Conference on Learning Representations","author":"Hu","key":"ref18"},{"key":"ref19","article-title":"Bc-z: Zero-shot task generalization with robotic imitation learning","volume-title":"ArXiv","volume":"abs\/2202.02005","author":"Jang","year":"2022"},{"article-title":"Vision-language foundation models as effective robot imitators","year":"2023","author":"Li","key":"ref20"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2021.XVII.047"},{"key":"ref22","first-page":"3992","article-title":"Segment anything","volume-title":"2023 IEEE\/CVF International Conference on Computer Vision (ICCV)","author":"Kirillov"},{"key":"ref23","article-title":"Decision transformer: Reinforcement learning via sequence modeling","author":"Chen","year":"2021","journal-title":"Neural Information Processing Systems"},{"key":"ref24","article-title":"Offline reinforcement learning as one big sequence modeling problem","author":"Janner","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"article-title":"Masked trajectory models for prediction, representation, and control","volume-title":"International Conference on Machine Learning","author":"Wu","key":"ref25"},{"journal-title":"Masked autoencoding for scalable and generalizable decision making","year":"2023","author":"Liu","key":"ref26"},{"key":"ref27","doi-asserted-by":"crossref","DOI":"10.15607\/RSS.2024.XX.092","article-title":"Any-point trajectory modeling for policy learning","author":"Wen","year":"2024"},{"article-title":"R3m: A universal visual representation for robot manipulation","volume-title":"Conference on Robot Learning","author":"Nair","key":"ref28"},{"journal-title":"Where are we in the search for an artificial visual cortex for embodied intelligence?","year":"2023","author":"Majumdar","key":"ref29"},{"journal-title":"Learning interactive real-world simulators","year":"2024","author":"Yang","key":"ref30"},{"journal-title":"Visual instruction tuning","year":"2023","author":"Liu","key":"ref31"},{"article-title":"Minigpt-4: Enhancing vision-language understanding with advanced large language models","year":"2023","author":"Zhu","key":"ref32"},{"article-title":"Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models","volume-title":"International Conference on Machine Learning","author":"Li","key":"ref33"},{"journal-title":"An empirical study of catastrophic forgetting in large language models during continual fine-tuning","year":"2023","author":"Luo","key":"ref34"},{"article-title":"Lisa: Reasoning segmentation via large language model","year":"2023","author":"Lai","key":"ref35"},{"journal-title":"3d diffuser actor: Policy diffusion with 3d scene representations","year":"2024","author":"Ke","key":"ref36"},{"journal-title":"PaLM-E: An embodied multimodal language model","year":"2023","author":"Driess","key":"ref37"},{"journal-title":"Set-of-mark prompting unleashes extraordinary visual grounding in gpt-4v","year":"2023","author":"Yang","key":"ref38"},{"journal-title":"Vision-language foundation models as effective robot imitators","year":"2024","author":"Li","key":"ref39"},{"journal-title":"Openflamingo: An open-source framework for training large autoregressive vision-language models","year":"2023","author":"Awadalla","key":"ref40"},{"journal-title":"Learning transferable visual models from natural language supervision","year":"2021","author":"Radford","key":"ref41"}],"event":{"name":"2024 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2024,10,14]]},"location":"Abu Dhabi, United Arab Emirates","end":{"date-parts":[[2024,10,18]]}},"container-title":["2024 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10801246\/10801290\/10801683.pdf?arnumber=10801683","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,26]],"date-time":"2024-12-26T06:57:59Z","timestamp":1735196279000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10801683\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,14]]},"references-count":41,"URL":"https:\/\/doi.org\/10.1109\/iros58592.2024.10801683","relation":{},"subject":[],"published":{"date-parts":[[2024,10,14]]}}}