{"id":"https://openalex.org/W2774742309","doi":"https://doi.org/10.1109/iros.2017.8205959","title":"Policy transfer via modularity and reward guiding","display_name":"Policy transfer via modularity and reward guiding","publication_year":2017,"publication_date":"2017-09-01","ids":{"openalex":"https://openalex.org/W2774742309","doi":"https://doi.org/10.1109/iros.2017.8205959","mag":"2774742309"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros.2017.8205959","pdf_url":null,"source":{"id":"https://openalex.org/S4363607734","display_name":"2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073533020","display_name":"Ignasi Clavera","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ignasi Clavera","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, UC Berkeley"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, UC Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037048516","display_name":"David Held","orcid":"https://orcid.org/0000-0003-0537-1508"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Held","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, UC Berkeley"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, UC Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049349154","display_name":"Pieter Abbeel","orcid":null},"institutions":[{"id":"https://openalex.org/I4210161460","display_name":"OpenAI (United States)","ror":"https://ror.org/05wx9n238","country_code":"US","type":"company","lineage":["https://openalex.org/I4210161460"]},{"id":"https://openalex.org/I1297971548","display_name":"International Computer Science Institute","ror":"https://ror.org/01ewh7m12","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1297971548"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pieter Abbeel","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, UC Berkeley","International Computer Science Institute (ICSI)","OpenAI"],"affiliations":[{"raw_affiliation_string":"OpenAI","institution_ids":["https://openalex.org/I4210161460"]},{"raw_affiliation_string":"International Computer Science Institute (ICSI)","institution_ids":["https://openalex.org/I1297971548"]},{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, UC Berkeley","institution_ids":["https://openalex.org/I95457486"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":6.469,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":38,"citation_normalized_percentile":{"value":0.959982,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1537","last_page":"1544"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":1.0,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":1.0,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10784","display_name":"Muscle activation and electromyography studies","score":0.9972,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modularity","display_name":"Modularity (biology)","score":0.7043808},{"id":"https://openalex.org/keywords/simulation-to-real-world-transfer","display_name":"Simulation to Real-world Transfer","score":0.576007},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement Learning","score":0.566219},{"id":"https://openalex.org/keywords/robot-learning","display_name":"Robot Learning","score":0.530012},{"id":"https://openalex.org/keywords/human-robot-collaboration","display_name":"Human-Robot Collaboration","score":0.508187},{"id":"https://openalex.org/keywords/prehensile-tail","display_name":"Prehensile tail","score":0.47183898}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.77235854},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.76071435},{"id":"https://openalex.org/C2779478453","wikidata":"https://www.wikidata.org/wiki/Q6889748","display_name":"Modularity (biology)","level":2,"score":0.7043808},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.678185},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6441873},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.56004214},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.55903405},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5449686},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.47222427},{"id":"https://openalex.org/C136380597","wikidata":"https://www.wikidata.org/wiki/Q10508905","display_name":"Prehensile tail","level":2,"score":0.47183898},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34890336},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10936141},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros.2017.8205959","pdf_url":null,"source":{"id":"https://openalex.org/S4363607734","display_name":"2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":45,"referenced_works":["https://openalex.org/W1191599655","https://openalex.org/W132147841","https://openalex.org/W143499627","https://openalex.org/W1501226982","https://openalex.org/W1608593143","https://openalex.org/W1674160379","https://openalex.org/W1771410628","https://openalex.org/W1777239053","https://openalex.org/W1949568868","https://openalex.org/W1973600975","https://openalex.org/W2029342956","https://openalex.org/W2050708324","https://openalex.org/W2051453180","https://openalex.org/W2060554619","https://openalex.org/W2099951169","https://openalex.org/W2108682071","https://openalex.org/W2109447584","https://openalex.org/W2124695578","https://openalex.org/W2125612430","https://openalex.org/W2128082316","https://openalex.org/W2135830364","https://openalex.org/W2140462634","https://openalex.org/W2149175962","https://openalex.org/W2158782408","https://openalex.org/W2159600763","https://openalex.org/W2173248099","https://openalex.org/W2342662072","https://openalex.org/W2384495648","https://openalex.org/W2396274844","https://openalex.org/W2416477367","https://openalex.org/W2418627435","https://openalex.org/W2443711627","https://openalex.org/W2461838149","https://openalex.org/W2472269674","https://openalex.org/W2529601334","https://openalex.org/W2530944449","https://openalex.org/W2564480083","https://openalex.org/W2952629144","https://openalex.org/W2963030226","https://openalex.org/W2963641140","https://openalex.org/W2963864421","https://openalex.org/W2963892386","https://openalex.org/W2964043796","https://openalex.org/W2964161785","https://openalex.org/W590039119"],"related_works":["https://openalex.org/W3166169123","https://openalex.org/W2998125923","https://openalex.org/W2768832826","https://openalex.org/W2552641899","https://openalex.org/W2465876097","https://openalex.org/W2105329304","https://openalex.org/W2028231052","https://openalex.org/W2010045274","https://openalex.org/W197901881","https://openalex.org/W1551001629"],"abstract_inverted_index":{"Non-prehensile":[0],"manipulation,":[1],"such":[2,64,176],"as":[3,18],"pushing,":[4],"is":[5,15],"an":[6,19,52,177,185],"important":[7],"function":[8,146,160],"for":[9,36],"robots":[10],"to":[11,21,25,45,49,56,79,85,93,96,147,156,182],"move":[12],"objects":[13],"and":[14,74,105,117,143,161,172,191],"sometimes":[16],"preferred":[17],"alternative":[20],"grasping.":[22],"However,":[23],"due":[24],"unknown":[26],"frictional":[27],"forces,":[28],"pushing":[29,70],"has":[30],"been":[31],"proven":[32],"a":[33,47,65,120],"difficult":[34],"task":[35,138],"robots.":[37],"We":[38,127,167],"explore":[39],"the":[40,59,69,80,87,98,102,137,140,144,158,164],"use":[41,94],"of":[42,62,122],"reinforcement":[43],"learning":[44],"train":[46,68,118],"robot":[48],"robustly":[50],"push":[51,184],"object.":[53],"In":[54,83],"order":[55,84],"deal":[57],"with":[58],"sample":[60],"complexity":[61],"training":[63,109,165],"method,":[66],"we":[67,91,111,131,152],"policy":[71,78,100],"in":[72,125,169],"simulation":[73,171],"then":[75],"transfer":[76,88],"this":[77],"real":[81],"world.":[82],"ease":[86],"from":[89,101,187],"simulation,":[90],"propose":[92],"modularity":[95],"separate":[97],"learned":[99],"raw":[103],"inputs":[104],"outputs;":[106],"rather":[107],"than":[108],"\"end-to-end,\"":[110],"decompose":[112],"our":[113],"system":[114],"into":[115,139],"modules":[116,124],"only":[119],"subset":[121],"these":[123],"simulation.":[126],"further":[128,162],"demonstrate":[129],"that":[130,175],"can":[132,179],"incorporate":[133],"prior":[134],"knowledge":[135],"about":[136],"state":[141],"space":[142],"reward":[145,159],"speed":[148],"up":[149],"convergence.":[150],"Finally,":[151],"introduce":[153],"\"reward":[154],"guiding\"":[155],"modify":[157],"reduce":[163],"time.":[166],"demonstrate,":[168],"both":[170],"real-world":[173],"experiments,":[174],"approach":[178],"be":[180],"used":[181],"reliably":[183],"object":[186],"many":[188],"initial":[189],"positions":[190],"orientations.":[192],"Videos":[193],"available":[194],"at":[195],"https://goo.gl/B7LtY3.":[196]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2774742309","counts_by_year":[{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":8},{"year":2018,"cited_by_count":1}],"updated_date":"2024-12-05T04:10:43.119656","created_date":"2017-12-22"}