{"id":"https://openalex.org/W4305013041","doi":"https://doi.org/10.48550/arxiv.2210.05178","title":"Pre-Training for Robots: Offline RL Enables Learning New Tasks from a Handful of Trials","display_name":"Pre-Training for Robots: Offline RL Enables Learning New Tasks from a Handful of Trials","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4305013041","doi":"https://doi.org/10.48550/arxiv.2210.05178"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2210.05178","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2210.05178","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102493293","display_name":"Aviral Kumar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kumar, Aviral","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088818925","display_name":"Anikait Singh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Singh, Anikait","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029017885","display_name":"Frederik Ebert","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ebert, Frederik","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020206254","display_name":"Yanlai Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Yanlai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005431772","display_name":"Chelsea Finn","orcid":"https://orcid.org/0000-0001-6298-0874"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Finn, Chelsea","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5026322200","display_name":"Sergey Levine","orcid":"https://orcid.org/0000-0001-6764-2743"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Levine, Sergey","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":59},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11775","display_name":"COVID-19 diagnosis using AI","score":0.9978,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7775345},{"id":"https://openalex.org/keywords/representation","display_name":"Representation","score":0.41070157}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.81189907},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7775345},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6758989},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.6672033},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6669115},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.65588295},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.64200014},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.60810167},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5891398},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.41070157},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.36878946},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2210.05178","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2210.05178","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2210.05178","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W54497855","https://openalex.org/W4306904969","https://openalex.org/W3162204513","https://openalex.org/W3125814499","https://openalex.org/W3121970507","https://openalex.org/W2768698792","https://openalex.org/W217960748","https://openalex.org/W2110028391","https://openalex.org/W2090827041","https://openalex.org/W2032233321"],"abstract_inverted_index":{"Progress":[0],"in":[1,30,36,46,56,71,209,237,255,271],"deep":[2],"learning":[3,32,119,206],"highlights":[4],"the":[5,57,86,114,199,272],"tremendous":[6],"potential":[7],"of":[8,76,89,94,191,231,240,258],"utilizing":[9],"diverse":[10,68,232],"robotic":[11,31,147],"datasets":[12,25,70,148],"for":[13,26,110,116,126],"attaining":[14,27],"effective":[15,108,250],"generalization":[16,29,87],"and":[17,187,253,275],"makes":[18],"it":[19,174],"enticing":[20],"to":[21,41,53,79,137,175,184],"consider":[22],"leveraging":[23,227],"broad":[24],"robust":[28],"as":[33,157,159,219,221],"well.":[34],"However,":[35],"practice,":[37],"we":[38,61,65,99],"often":[39],"want":[40],"learn":[42,139],"a":[43,47,129,153,189,210,214,238,256],"new":[44,48,81,140,154,207,211],"skill":[45],"environment":[49],"that":[50,101,135,181,203,246],"is":[51,198],"unlikely":[52],"be":[54,106,269],"contained":[55],"prior":[58,192],"data.":[59],"Therefore":[60],"ask:":[62],"how":[63],"can":[64,105,248,268],"leverage":[66],"existing":[67,146,165,229],"offline":[69,103,133,166],"combination":[72],"with":[73,149,156,218],"small":[74],"amounts":[75,93],"task-specific":[77],"data":[78,235],"solve":[80],"tasks,":[82],"while":[83],"still":[84],"enjoying":[85],"benefits":[88],"training":[90],"on":[91,132,145,152,213],"large":[92],"data?":[95],"In":[96],"this":[97],"paper,":[98],"demonstrate":[100,245],"end-to-end":[102],"RL":[104,134,167,201],"an":[107,164,228],"approach":[109],"doing":[111],"this,":[112],"without":[113,260],"need":[115],"any":[117,262],"representation":[118],"or":[120],"vision-based":[121],"pre-training.":[122],"We":[123,243],"present":[124],"pre-training":[125,144],"robots":[127],"(PTR),":[128],"framework":[130],"based":[131],"attempts":[136],"effectively":[138,226],"tasks":[141,208],"by":[142,225],"combining":[143],"rapid":[150],"fine-tuning":[151,252],"task,":[155],"few":[158,220],"10":[160,222],"demonstrations.":[161,263],"PTR":[162,183,197,247],"utilizes":[163],"method,":[168],"conservative":[169],"Q-learning":[170],"(CQL),":[171],"but":[172],"extends":[173],"include":[176],"several":[177],"crucial":[178],"design":[179],"decisions":[180],"enable":[182,249],"actually":[185],"work":[186],"outperform":[188],"variety":[190,239],"methods.":[193],"To":[194],"our":[195],"knowledge,":[196],"first":[200],"method":[202],"succeeds":[204],"at":[205,276],"domain":[212],"real":[215],"WidowX":[216],"robot":[217,234],"task":[223],"demonstrations,":[224],"dataset":[230],"multi-task":[233],"collected":[236],"toy":[241],"kitchens.":[242],"also":[244],"autonomous":[251],"improvement":[254],"handful":[257],"trials,":[259],"needing":[261],"An":[264],"accompanying":[265],"overview":[266],"video":[267],"found":[270],"supplementary":[273],"material":[274],"thi":[277],"URL:":[278],"https://sites.google.com/view/ptr-final/":[279]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4305013041","counts_by_year":[],"updated_date":"2025-03-04T16:20:27.677291","created_date":"2022-10-14"}