{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,5]],"date-time":"2025-04-05T01:53:23Z","timestamp":1743818003416,"version":"3.28.0"},"reference-count":74,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,6,16]],"date-time":"2024-06-16T00:00:00Z","timestamp":1718496000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,6,16]],"date-time":"2024-06-16T00:00:00Z","timestamp":1718496000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,6,16]]},"DOI":"10.1109\/cvpr52733.2024.01453","type":"proceedings-article","created":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T17:34:53Z","timestamp":1726508093000},"page":"15342-15353","source":"Crossref","is-referenced-by-count":3,"title":["Diffusion-ES: Gradient-Free Planning with Diffusion for Autonomous and Instruction-Guided Driving"],"prefix":"10.1109","author":[{"given":"Brian","family":"Yang","sequence":"first","affiliation":[{"name":"Carnegie Mellon University"}]},{"given":"Huangyuan","family":"Su","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University"}]},{"given":"Nikolaos","family":"Gkanatsios","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University"}]},{"given":"Tsung-Wei","family":"Ke","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University"}]},{"given":"Ayush","family":"Jain","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University"}]},{"given":"Jeff","family":"Schneider","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University"}]},{"given":"Katerina","family":"Fragkiadaki","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University"}]}],"member":"263","reference":[{"journal-title":"Is conditional generative modeling all you need for decision-making?","year":"2022","author":"Ajay","key":"ref1"},{"key":"ref2","article-title":"Chauffeurnet: Learning to drive by imitating the best and synthesizing the worst","author":"Bansal","year":"2018","journal-title":"CoRR, abs\/1812.03079"},{"journal-title":"End to end learning for self-driving cars","year":"2016","author":"Bojarski","key":"ref3"},{"journal-title":"Language models are few-shot learners","year":"2020","author":"Brown","key":"ref4"},{"journal-title":"Nuplan: A closed-loop ml-based planning benchmark for autonomous vehicles","year":"2022","author":"Caesar","key":"ref5"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10342382"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01671"},{"journal-title":"Learning by cheating","year":"2019","author":"Chen","key":"ref8"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.026"},{"key":"ref10","article-title":"End-to-end driving via conditional imitation learning","author":"Codevilla","year":"2017","journal-title":"CoRR, abs\/1710.02410"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01580"},{"journal-title":"Parting with misconceptions about learning-based vehicle motion planning","year":"2023","author":"Dauner","key":"ref12"},{"volume-title":"Diffusion models beat gans on image synthesis","year":"2021","author":"Dhariwal","key":"ref13"},{"journal-title":"Reduce, reuse, recycle: Compositional generation with energy-based diffusion models and mcmc","year":"2023","author":"Du","key":"ref14"},{"journal-title":"Learning universal policies via text-guided video generation","year":"2023","author":"Du","key":"ref15"},{"journal-title":"Baidu apollo em motion planner","year":"2018","author":"Fan","key":"ref16"},{"journal-title":"Motion prediction under multi-modality with conditional stochastic networks","year":"2017","author":"Fragkiadaki","key":"ref17"},{"key":"ref18","article-title":"Energy-based Models are Zero-Shot Planners for Compositional Scene Rearrangement","author":"Gkanatsios","year":"2023","journal-title":"Robotics: Science and Systems"},{"key":"ref19","first-page":"14953","article-title":"Visual pro-gramming: Compositional visual reasoning without training","volume-title":"Proceedings of the IEEEICVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Gupta"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevE.58.133"},{"key":"ref21","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"journal-title":"Gaia-I: A generative world model for autonomous driving","year":"2023","author":"Hu","key":"ref22"},{"journal-title":"Language models as zero-shot planners: Extracting actionable knowledge for embodied agents","year":"2022","author":"Huang","key":"ref23"},{"journal-title":"Inner monologue: Embodied reasoning through planning with language models","year":"2022","author":"Huang","key":"ref24"},{"journal-title":"Voxposer: Composable 3d value maps for robotic manipulation with language models","year":"2023","author":"Huang","key":"ref25"},{"article-title":"Planning with diffusion for flexible behavior synthesis","volume-title":"International Conference on Machine Learning","author":"Janner","key":"ref26"},{"key":"ref27","first-page":"9644","article-title":"Motion-diffuser: Controllable multi agent motion prediction using diffusion","volume-title":"Proceedings of the IEEEICVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Jiang"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00930"},{"journal-title":"Gen2sim: Scaling up robot learning in simulation with generative models","year":"2023","author":"Katara","key":"ref29"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3231525"},{"key":"ref31","article-title":"Imitating driver be-havior with generative adversarial networks","author":"Kuefler","year":"2017","journal-title":"CoRR, abs\/1701.06699"},{"journal-title":"Code as policies: Language model programs for em-bodied control","year":"2022","author":"Liang","key":"ref32"},{"key":"ref33","article-title":"Adaptdiffuser: Diffusion models as adaptive self-evolving planners","author":"Liang","year":"2023","journal-title":"ICML"},{"key":"ref34","article-title":"Pre-train, prompt, and predict: A systematic survey of prompting methods in natural language processing","author":"Liu","year":"2021","journal-title":"CoRR"},{"journal-title":"Contrastive energy prediction for exact energy-guided diffusion sampling in offline reinforcement learning","year":"2023","author":"Lu","key":"ref35"},{"journal-title":"Gpt-driver: Learning to drive with gpt","year":"2023","author":"Mao","key":"ref36"},{"issue":"9","key":"ref37","first-page":"468","article-title":"The stanford entry in the urban challenge","volume":"7","author":"Montremerlo","year":"2008","journal-title":"Journal of Field Robotics"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/icra48891.2023.10160609"},{"key":"ref39","first-page":"8162","article-title":"Improved denoising diffusion probabilistic models","volume-title":"International Conference on Machine Learning","author":"Quinn Nichol"},{"journal-title":"Imitating human behaviour with diffusion models","year":"2023","author":"Pearce","key":"ref40"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2017.7995816"},{"key":"ref42","first-page":"305","article-title":"ALVINN: an autonomous land vehicle in a neural network","volume-title":"Advances in Neural Information Processing Systems 1","author":"Pomerleau","year":"1989"},{"key":"ref43","article-title":"Multi-modal fusion transformer for end-to-end au-tonomous driving","author":"Prakash","year":"2021","journal-title":"CoRR, abs\/2104.09224"},{"journal-title":"Hierarchical text-conditional image generation with clip latents","year":"2022","author":"Ramesh","key":"ref44"},{"article-title":"Plant: Explainable planning transformers via object-level representations","volume-title":"6th Annual Conference on Robot Learning","author":"Renz","key":"ref45"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.028"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561683"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1016\/S0377-2217(96)00385-2"},{"journal-title":"Pho-torealistic text-to-image diffusion models with deep language understanding","year":"2022","author":"Saharia","key":"ref50"},{"article-title":"Urban driver: Learning to drive from real-world demonstrations using policy gradients","volume-title":"Conference on Robot Learning","author":"Scheel","key":"ref51"},{"key":"ref52","first-page":"718","article-title":"Urban driver: Learning to drive from real-world demonstrations using policy gradients","volume-title":"Conference on Robot Learning","author":"Scheel"},{"journal-title":"Languagempc: Large language models as decision makers for autonomous driving","year":"2023","author":"Sha","key":"ref53"},{"journal-title":"Behavior transformers: Cloning k modes with one stone","year":"2022","author":"Muhammad","key":"ref54"},{"journal-title":"Deep unsupervised learning using nonequilibrium thermodynamics","year":"2015","author":"Narain Sohl-Dickstein","key":"ref55"},{"journal-title":"Denoising diffusion implicit models","year":"2020","author":"Song","key":"ref56"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01092"},{"key":"ref58","article-title":"Mul-tiple futures prediction","author":"Charlie Tang","year":"2019","journal-title":"CoRR, abs\/1911.00997"},{"key":"ref59","article-title":"Se (3)-diffusionfields: Learning cost functions for joint grasp and motion optimization through diffusion","author":"Urain","year":"2023","journal-title":"ICRA"},{"journal-title":"Diffusion policies as an expressive policy class for offline reinforcement learning","year":"2022","author":"Wang","key":"ref60"},{"key":"ref61","article-title":"Chain of thought prompting elicits reasoning in large language models","author":"Wei","year":"2022","journal-title":"CoRR, abs\/2201.11903"},{"journal-title":"On the road with gpt-4v (ision): Early explorations of visual-language model on autonomous driving","year":"2023","author":"Wen","key":"ref62"},{"journal-title":"Model predictive path integral control using covariance variable importance sampling","year":"2015","author":"Williams","key":"ref63"},{"key":"ref64","article-title":"Chained-diffuser: Unifying trajectory diffusion and keypose prediction for robotic manipulation","author":"Xian","year":"2023","journal-title":"CoRL"},{"key":"ref65","article-title":"Regression planning networks","author":"Xu","year":"2019","journal-title":"CoRR, abs\/1909.13072"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/lra.2024.3440097"},{"journal-title":"Learning interactive real-world simulators","year":"2023","author":"Yang","key":"ref67"},{"volume-title":"Language to rewards for robotic skill synthesis","year":"2023","author":"Yu","key":"ref68"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00886"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01494"},{"journal-title":"Guided conditional diffusion for controllable traffic simulation","year":"2022","author":"Zhong","key":"ref71"},{"journal-title":"Guided conditional diffusion for controllable traffic simulation","year":"2022","author":"Zhong","key":"ref72"},{"key":"ref73","article-title":"Hierarchical planning for long-horizon ma-nipulation with geometric and symbolic scene graphs","author":"Zhu","year":"2020","journal-title":"CoRR, abs\/2012.07277"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2014.6856581"}],"event":{"name":"2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","start":{"date-parts":[[2024,6,16]]},"location":"Seattle, WA, USA","end":{"date-parts":[[2024,6,22]]}},"container-title":["2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10654794\/10654797\/10657485.pdf?arnumber=10657485","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,21]],"date-time":"2024-09-21T05:53:00Z","timestamp":1726897980000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10657485\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,16]]},"references-count":74,"URL":"https:\/\/doi.org\/10.1109\/cvpr52733.2024.01453","relation":{},"subject":[],"published":{"date-parts":[[2024,6,16]]}}}