{"id":"https://openalex.org/W3206827162","doi":"https://doi.org/10.1109/icra46639.2022.9812312","title":"Offline Meta-Reinforcement Learning for Industrial Insertion","display_name":"Offline Meta-Reinforcement Learning for Industrial Insertion","publication_year":2022,"publication_date":"2022-05-23","ids":{"openalex":"https://openalex.org/W3206827162","doi":"https://doi.org/10.1109/icra46639.2022.9812312","mag":"3206827162"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra46639.2022.9812312","pdf_url":null,"source":{"id":"https://openalex.org/S4363607759","display_name":"2022 International Conference on Robotics and Automation (ICRA)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2110.04276","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041306134","display_name":"Tony Z. Zhao","orcid":null},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tony Z. Zhao","raw_affiliation_strings":["Work done as an intern at X, The Moonshot Factory, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Work done as an intern at X, The Moonshot Factory, Mountain View, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037464692","display_name":"Jianlan Luo","orcid":"https://orcid.org/0009-0008-8029-7794"},"institutions":[{"id":"https://openalex.org/I4210105824","display_name":"Intrinsic LifeSciences (United States)","ror":"https://ror.org/01maah330","country_code":"US","type":"company","lineage":["https://openalex.org/I4210105824"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jianlan Luo","raw_affiliation_strings":["Intrinsic Innovation LLC, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Intrinsic Innovation LLC, Mountain View, CA, USA","institution_ids":["https://openalex.org/I4210105824"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061512729","display_name":"Oleg Sushkov","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Oleg Sushkov","raw_affiliation_strings":["Deepmind, London, UK"],"affiliations":[{"raw_affiliation_string":"Deepmind, London, UK","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063215180","display_name":"Rugile Pevceviciute","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Rugile Pevceviciute","raw_affiliation_strings":["Deepmind, London, UK"],"affiliations":[{"raw_affiliation_string":"Deepmind, London, UK","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062951341","display_name":"Nicolas Heess","orcid":"https://orcid.org/0000-0001-7876-9256"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Nicolas Heess","raw_affiliation_strings":["Deepmind, London, UK"],"affiliations":[{"raw_affiliation_string":"Deepmind, London, UK","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014848474","display_name":"Jon Scholz","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jon Scholz","raw_affiliation_strings":["Deepmind, London, UK"],"affiliations":[{"raw_affiliation_string":"Deepmind, London, UK","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029642293","display_name":"Stefan Schaal","orcid":"https://orcid.org/0000-0001-5660-1874"},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stefan Schaal","raw_affiliation_strings":["Work done as an intern at X, The Moonshot Factory, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Work done as an intern at X, The Moonshot Factory, Mountain View, CA, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026322200","display_name":"Sergey Levine","orcid":"https://orcid.org/0000-0001-6764-2743"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sergey Levine","raw_affiliation_strings":["Department of Electric Engineering and Computer Science, University of California, Berkeley, Berkeley, CA, USA","Google Brain, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google Brain, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Department of Electric Engineering and Computer Science, University of California, Berkeley, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":22.776,"has_fulltext":false,"cited_by_count":38,"citation_normalized_percentile":{"value":0.999795,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"6386","last_page":"6393"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9962,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9962,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.978,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12111","display_name":"Industrial Vision Systems and Defect Detection","score":0.9768,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8664259},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.85186017},{"id":"https://openalex.org/C2781002164","wikidata":"https://www.wikidata.org/wiki/Q6822311","display_name":"Meta learning (computer science)","level":3,"score":0.7905271},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.6697176},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.63432014},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5760455},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.55460566},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5447702},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.49993396},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.33775067},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra46639.2022.9812312","pdf_url":null,"source":{"id":"https://openalex.org/S4363607759","display_name":"2022 International Conference on Robotics and Automation (ICRA)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2110.04276","pdf_url":"https://arxiv.org/pdf/2110.04276","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2110.04276","pdf_url":"https://arxiv.org/pdf/2110.04276","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.53,"display_name":"Industry, innovation and infrastructure"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":55,"referenced_works":["https://openalex.org/W1938885652","https://openalex.org/W2008731016","https://openalex.org/W2034052310","https://openalex.org/W2064557786","https://openalex.org/W2110697446","https://openalex.org/W2111067802","https://openalex.org/W2115617434","https://openalex.org/W2126909264","https://openalex.org/W2155007355","https://openalex.org/W2539574638","https://openalex.org/W2553722312","https://openalex.org/W2578206533","https://openalex.org/W2604763608","https://openalex.org/W2741122588","https://openalex.org/W2755546070","https://openalex.org/W2781726626","https://openalex.org/W2788904251","https://openalex.org/W2794757725","https://openalex.org/W2883403478","https://openalex.org/W2923504512","https://openalex.org/W2952526277","https://openalex.org/W2962732055","https://openalex.org/W2962732398","https://openalex.org/W2963165111","https://openalex.org/W2963176272","https://openalex.org/W2963403593","https://openalex.org/W2963411833","https://openalex.org/W2963864421","https://openalex.org/W2963940579","https://openalex.org/W2964093801","https://openalex.org/W2964161785","https://openalex.org/W2964333597","https://openalex.org/W2967355195","https://openalex.org/W2967727187","https://openalex.org/W2968268581","https://openalex.org/W3012148463","https://openalex.org/W3022566517","https://openalex.org/W3033324992","https://openalex.org/W3034786558","https://openalex.org/W3047193571","https://openalex.org/W3090369311","https://openalex.org/W3097907450","https://openalex.org/W3125760305","https://openalex.org/W3130717831","https://openalex.org/W3130984490","https://openalex.org/W3159735414","https://openalex.org/W3169929896","https://openalex.org/W3172360140","https://openalex.org/W3174364619","https://openalex.org/W3178748050","https://openalex.org/W3210319071","https://openalex.org/W4287082344","https://openalex.org/W4287689437","https://openalex.org/W4287692003","https://openalex.org/W4287756699"],"related_works":["https://openalex.org/W54497855","https://openalex.org/W4383109125","https://openalex.org/W4294873804","https://openalex.org/W3130669838","https://openalex.org/W3121970507","https://openalex.org/W2891227010","https://openalex.org/W2785397462","https://openalex.org/W217960748","https://openalex.org/W2110028391","https://openalex.org/W2032233321"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,259],"(RL)":[2],"can":[3,86,116,155],"in":[4,79,125,177,206],"principle":[5],"let":[6],"robots":[7],"automatically":[8],"adapt":[9,48,236],"to":[10,23,33,45,47,146,157,159,203,234,237],"new":[11,34,160,199],"tasks,":[12,142,243],"but":[13],"current":[14],"RL":[15,129],"methods":[16,154],"require":[17,108],"a":[18,50,174,238,245,252],"large":[19,63],"number":[20,64],"of":[21,39,65,139,240,248,254],"trials":[22,67,136],"accomplish":[24],"this.":[25],"In":[26,93],"this":[27,94,115,188],"paper,":[28],"we":[29,96],"tackle":[30],"rapid":[31],"adaptation":[32,58,75],"tasks":[35,44,161,261],"through":[36,225],"the":[37,140,144,198,207,211,255,260],"framework":[38],"meta-learning,":[40],"which":[41,172],"utilizes":[42],"past":[43],"learn":[46],"with":[49,119,193,244],"specific":[51,99],"focus":[52],"on":[53],"industrial":[54,178],"insertion":[55,84,242],"tasks.":[56],"Fast":[57],"is":[59,76,201,219,232],"crucial":[60],"because":[61],"prohibitively":[62],"on-robot":[66],"will":[68],"potentially":[69],"damage":[70],"hardware":[71],"pieces.":[72],"Additionally,":[73],"effective":[74],"also":[77],"feasible":[78],"that":[80,114,131,162,229],"experience":[81],"among":[82],"different":[83,165,241],"applications":[85],"be":[87,117],"largely":[88],"leveraged":[89],"by":[90,189],"each":[91,138],"other.":[92],"setting,":[95],"address":[97,187],"two":[98],"challenges":[100],"when":[101],"applying":[102],"meta-learning.":[103],"First,":[104],"conventional":[105],"meta-RL":[106,149,153],"algorithms":[107],"lengthy":[109],"online":[110,195],"meta-training.":[111],"We":[112,186,227],"show":[113,228],"replaced":[118],"appropriately":[120],"chosen":[121],"offline":[122,127],"data,":[123,209],"resulting":[124],"an":[126],"meta-":[128],"method":[130],"only":[132,251],"requires":[133],"demonstrations":[134],"and":[135,216,266],"from":[137,166,262],"prior":[141,208],"without":[143],"need":[145],"run":[147],"costly":[148],"procedures":[150],"online.":[151],"Second,":[152],"fail":[156],"generalize":[158],"are":[163,184,268],"too":[164,220],"those":[167,204],"seen":[168,205],"at":[169,270],"meta-training":[170],"time,":[171],"poses":[173],"particular":[175],"challenge":[176],"applications,":[179],"where":[180],"high":[181],"success":[182,246],"rates":[183],"critical.":[185],"combining":[190],"contextual":[191,212],"meta-learning":[192],"direct":[194],"finetuning:":[196],"if":[197,217],"task":[200],"similar":[202],"then":[210],"meta-learner":[213],"adapts":[214,224],"immediately,":[215],"it":[218,222],"different,":[221],"gradually":[223],"finetuning.":[226],"our":[230],"approach":[231],"able":[233],"quickly":[235],"variety":[239],"rate":[247],"100%":[249],"using":[250],"fraction":[253],"samples":[256],"needed":[257],"for":[258],"scratch.":[263],"Experiment":[264],"videos":[265],"details":[267],"available":[269],"//sites.google.com/view/offline-metarl-insertion.https:":[271]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3206827162","counts_by_year":[{"year":2024,"cited_by_count":16},{"year":2023,"cited_by_count":18},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1}],"updated_date":"2025-01-05T12:12:41.013087","created_date":"2021-10-25"}