{"id":"https://openalex.org/W4387725599","doi":"https://doi.org/10.48550/arxiv.2310.10639","title":"Zero-Shot Robotic Manipulation with Pretrained Image-Editing Diffusion Models","display_name":"Zero-Shot Robotic Manipulation with Pretrained Image-Editing Diffusion Models","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4387725599","doi":"https://doi.org/10.48550/arxiv.2310.10639"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2310.10639","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2310.10639","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101664820","display_name":"Kevin Black","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Black, Kevin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012160589","display_name":"Mitsuhiko Nakamoto","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nakamoto, Mitsuhiko","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033692685","display_name":"Pranav Atreya","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Atreya, Pranav","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064208112","display_name":"Homer Walke","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Walke, Homer","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005431772","display_name":"Chelsea Finn","orcid":"https://orcid.org/0000-0001-6298-0874"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Finn, Chelsea","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102493293","display_name":"Aviral Kumar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kumar, Aviral","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5026322200","display_name":"Sergey Levine","orcid":"https://orcid.org/0000-0001-6764-2743"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Levine, Sergey","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.778623,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":93,"max":94},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9939,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9939,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9914,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.971,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.74037915},{"id":"https://openalex.org/keywords/planner","display_name":"Planner","score":0.55184567}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7879736},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.76165366},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.74037915},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.70569354},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.5976134},{"id":"https://openalex.org/C2776999362","wikidata":"https://www.wikidata.org/wiki/Q2349274","display_name":"Planner","level":2,"score":0.55184567},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47244316},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4532387},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.44327867},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.32067358},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2310.10639","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.10639","pdf_url":"http://arxiv.org/pdf/2310.10639","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2310.10639","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2310.10639","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.49,"display_name":"Peace, justice, and strong institutions"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W804484174","https://openalex.org/W56933075","https://openalex.org/W4246538999","https://openalex.org/W4244698559","https://openalex.org/W2482749251","https://openalex.org/W2378211422","https://openalex.org/W2168364913","https://openalex.org/W2002361198","https://openalex.org/W1568779110","https://openalex.org/W1548568597"],"abstract_inverted_index":{"If":[0],"generalist":[1],"robots":[2],"are":[3],"to":[4,12,15,49,103,109,130,169],"operate":[5],"in":[6,32],"truly":[7],"unstructured":[8],"environments,":[9],"they":[10],"need":[11],"be":[13,30,187],"able":[14],"recognize":[16],"and":[17,22,26,77,93,127,140,154,180],"reason":[18],"about":[19],"novel":[20],"objects":[21,25],"scenarios.":[23],"Such":[24],"scenarios":[27],"might":[28],"not":[29],"present":[31],"the":[33,89,100,112,119,132,151],"robot's":[34,90],"own":[35],"training":[36,181],"data.":[37,182],"We":[38,97,116,146],"propose":[39],"SuSIE,":[40],"a":[41,52,60,94,105],"method":[42],"that":[43,59,81,118,166,173],"leverages":[44],"an":[45],"image-editing":[46],"diffusion":[47],"model":[48],"act":[50,110],"as":[51,111],"high-level":[53,120],"planner":[54],"by":[55],"proposing":[56],"intermediate":[57],"subgoals":[58],"low-level":[61,106,114,133],"controller":[62],"can":[63,123,186],"accomplish.":[64],"Specifically,":[65],"we":[66],"finetune":[67],"InstructPix2Pix":[68],"on":[69,150,159],"video":[70],"data,":[71],"consisting":[72],"of":[73,176],"both":[74],"human":[75],"videos":[76],"robot":[78,101],"rollouts,":[79],"such":[80],"it":[82],"outputs":[83],"hypothetical":[84],"future":[85],"\"subgoal\"":[86],"observations":[87],"given":[88],"current":[91],"observation":[92],"language":[95],"command.":[96],"also":[98,155],"use":[99],"data":[102],"train":[104],"goal-conditioned":[107,134],"policy":[108],"aforementioned":[113],"controller.":[115],"find":[117],"subgoal":[121],"predictions":[122],"utilize":[124,174],"Internet-scale":[125],"pretraining":[126],"visual":[128],"understanding":[129],"guide":[131],"policy,":[135],"achieving":[136],"significantly":[137],"better":[138],"generalization":[139,158],"precision":[141],"than":[142],"conventional":[143],"language-conditioned":[144],"policies.":[145],"achieve":[147],"state-of-the-art":[148],"results":[149],"CALVIN":[152],"benchmark,":[153],"demonstrate":[156],"robust":[157],"real-world":[160],"manipulation":[161],"tasks,":[162],"beating":[163],"strong":[164],"baselines":[165],"have":[167],"access":[168],"privileged":[170],"information":[171],"or":[172],"orders":[175],"magnitude":[177],"more":[178],"compute":[179],"The":[183],"project":[184],"website":[185],"found":[188],"at":[189],"http://rail-berkeley.github.io/susie":[190],".":[191]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4387725599","counts_by_year":[{"year":2024,"cited_by_count":6}],"updated_date":"2025-01-04T14:58:58.483919","created_date":"2023-10-18"}