{"id":"https://openalex.org/W3174364619","doi":"https://doi.org/10.15607/rss.2021.xvii.088","title":"Robust Multi-Modal Policies for Industrial Assembly via Reinforcement Learning and Demonstrations: A Large-Scale Study","display_name":"Robust Multi-Modal Policies for Industrial Assembly via Reinforcement Learning and Demonstrations: A Large-Scale Study","publication_year":2021,"publication_date":"2021-06-27","ids":{"openalex":"https://openalex.org/W3174364619","doi":"https://doi.org/10.15607/rss.2021.xvii.088","mag":"3174364619"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.15607/rss.2021.xvii.088","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://doi.org/10.15607/rss.2021.xvii.088","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037464692","display_name":"Jianlan Luo","orcid":"https://orcid.org/0009-0008-8029-7794"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jianlan Luo*","raw_affiliation_strings":["Google,,,,,"],"affiliations":[{"raw_affiliation_string":"Google,,,,,","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061512729","display_name":"Oleg Sushkov","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Oleg Sushkov*","raw_affiliation_strings":["DeepMind"],"affiliations":[{"raw_affiliation_string":"DeepMind","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063215180","display_name":"Rugile Pevceviciute","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Rugile Pevceviciute*","raw_affiliation_strings":["DeepMind"],"affiliations":[{"raw_affiliation_string":"DeepMind","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017678179","display_name":"Wenzhao Lian","orcid":"https://orcid.org/0000-0002-0995-8229"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wenzhao Lian","raw_affiliation_strings":["Google[x]"],"affiliations":[{"raw_affiliation_string":"Google[x]","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045324437","display_name":"Chang Su","orcid":"https://orcid.org/0000-0002-8704-1512"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chang Su","raw_affiliation_strings":["Google[x]"],"affiliations":[{"raw_affiliation_string":"Google[x]","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032877928","display_name":"Mel Vecer\u00edk","orcid":null},"institutions":[{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Mel Vecerik","raw_affiliation_strings":["University College London, Deepmind"],"affiliations":[{"raw_affiliation_string":"University College London, Deepmind","institution_ids":["https://openalex.org/I45129253","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100718480","display_name":"Ning Ye","orcid":"https://orcid.org/0000-0001-7249-8352"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ning Ye","raw_affiliation_strings":["Google[x]"],"affiliations":[{"raw_affiliation_string":"Google[x]","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029642293","display_name":"Stefan Schaal","orcid":"https://orcid.org/0000-0001-5660-1874"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stefan Schaal","raw_affiliation_strings":["Google,,,,,"],"affiliations":[{"raw_affiliation_string":"Google,,,,,","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077984643","display_name":"Jonathan Scholz","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jonathan Scholz","raw_affiliation_strings":["DeepMind"],"affiliations":[{"raw_affiliation_string":"DeepMind","institution_ids":["https://openalex.org/I4210090411"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":6.61,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":70,"citation_normalized_percentile":{"value":0.999914,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9724,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9724,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12784","display_name":"Modular Robots and Swarm Intelligence","score":0.9669,"subfield":{"id":"https://openalex.org/subfields/2210","display_name":"Mechanical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9595,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7477318},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.69144607},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5906745},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.58001566},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5727729},{"id":"https://openalex.org/C13736549","wikidata":"https://www.wikidata.org/wiki/Q4489420","display_name":"Industrial engineering","level":1,"score":0.32794106},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2952484},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.23673725},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.12468788},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0841341},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.15607/rss.2021.xvii.088","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2103.11512","pdf_url":"https://arxiv.org/pdf/2103.11512","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.15607/rss.2021.xvii.088","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.64,"display_name":"Industry, innovation and infrastructure"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":24,"referenced_works":["https://openalex.org/W1931877416","https://openalex.org/W2741122588","https://openalex.org/W2786928559","https://openalex.org/W2788862220","https://openalex.org/W2804380964","https://openalex.org/W2904453761","https://openalex.org/W2909718913","https://openalex.org/W2963099939","https://openalex.org/W2963411833","https://openalex.org/W2963713397","https://openalex.org/W2963940579","https://openalex.org/W2967355195","https://openalex.org/W2967727187","https://openalex.org/W2968268581","https://openalex.org/W3000439273","https://openalex.org/W3004815632","https://openalex.org/W3012148463","https://openalex.org/W3022566517","https://openalex.org/W3033324992","https://openalex.org/W3097907450","https://openalex.org/W3130984490","https://openalex.org/W4287756699","https://openalex.org/W4288363736","https://openalex.org/W4300799055"],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2920061524","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2089013912","https://openalex.org/W2076061571","https://openalex.org/W2072376847","https://openalex.org/W2038908348","https://openalex.org/W1987513656","https://openalex.org/W1977959518"],"abstract_inverted_index":{"Over":[0],"the":[1,34,40,68,78,115,122,138,185],"past":[2],"several":[3,126],"years":[4,127],"there":[5,193],"has":[6],"been":[7],"a":[8,72,91,110,151,158,161,167],"considerable":[9],"research":[10],"investment":[11],"into":[12,67,166],"learning-based":[13],"approaches":[14],"for":[15,45,60,86,197],"tasks":[16],"inspired":[17],"by":[18,32],"industrial":[19,69,112],"manufacturing,":[20],"but":[21,184],"despite":[22],"significant":[23,195],"progress,":[24],"these":[25,65,96],"techniques":[26,66],"have":[27],"yet":[28],"to":[29,95,135],"be":[30,200],"adopted":[31],"in":[33,141],"realworld.We":[35],"argue":[36],"that":[37,56,173,192],"it":[38],"is":[39,175],"prohibitively":[41],"large":[42],"design":[43,123],"space":[44],"Deep":[46],"Reinforcement":[47],"Learning":[48],"(DRL),":[49],"rather":[50],"than":[51],"algorithmic":[52],"limitations":[53],"per":[54],"se,":[55],"are":[57],"truly":[58],"responsible":[59],"this":[61,81],"lack":[62],"of":[63,98,101,109,128,143,164,177],"adoption.Pushing":[64],"mainstream":[70],"requires":[71],"paradigm":[73],"which":[74,130],"differs":[75],"significantly":[76],"from":[77,105],"academic":[79],"mindset.In":[80],"paper":[82],"we":[83,148],"define":[84],"criteria":[85,97],"industryoriented":[87],"DRL,":[88],"and":[89,146,157,191],"perform":[90],"thorough":[92],"comparison":[93],"according":[94],"one":[99],"family":[100],"learning":[102],"approaches,":[103,183],"DRL":[104,133,155,174],"demonstration,":[106],"against":[107],"results":[108],"professional":[111],"integrator":[113],"on":[114,160,202],"recently":[116],"established":[117,181],"NIST":[118],"assembly":[119],"benchmark.We":[120],"explain":[121],"choices,":[124],"representing":[125],"investigation,":[129],"enabled":[131],"our":[132,154,203],"system":[134,156,188],"consistently":[136],"outperform":[137],"integrator's":[139],"baseline":[140],"terms":[142],"both":[144],"speed":[145],"reliability.Finally,":[147],"conclude":[149],"with":[150],"competition":[152],"between":[153],"human":[159,186],"challenge":[162],"task":[163],"insertion":[165],"randomly":[168],"moving":[169],"target.This":[170],"study":[171],"suggests":[172],"capable":[176],"outperforming":[178],"not":[179],"only":[180],"engineered":[182],"motor":[187],"as":[189],"well,":[190],"remains":[194],"room":[196],"improvement.Videos":[198],"can":[199],"found":[201],"project":[204],"website:https://sites.google.com/view/shield-nist.":[205]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3174364619","counts_by_year":[{"year":2024,"cited_by_count":14},{"year":2023,"cited_by_count":45},{"year":2022,"cited_by_count":9},{"year":2021,"cited_by_count":2}],"updated_date":"2025-01-02T19:56:50.301764","created_date":"2021-07-05"}