{"id":"https://openalex.org/W4296932844","doi":"https://doi.org/10.48550/arxiv.2209.10656","title":"Learning from Symmetry: Meta-Reinforcement Learning with Symmetrical Behaviors and Language Instructions","display_name":"Learning from Symmetry: Meta-Reinforcement Learning with Symmetrical Behaviors and Language Instructions","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4296932844","doi":"https://doi.org/10.48550/arxiv.2209.10656"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2209.10656","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2209.10656","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073985865","display_name":"Xiangtong Yao","orcid":"https://orcid.org/0000-0003-2556-3072"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Xiangtong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060444894","display_name":"Zhenshan Bing","orcid":"https://orcid.org/0000-0002-0896-2517"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bing, Zhenshan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072201303","display_name":"Genghang Zhuang","orcid":"https://orcid.org/0000-0003-2478-7912"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhuang, Genghang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016558517","display_name":"Kejia Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Kejia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103182284","display_name":"Hongkuan Zhou","orcid":"https://orcid.org/0000-0002-1329-2626"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Hongkuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100768452","display_name":"Kai Huang","orcid":"https://orcid.org/0000-0003-0359-7810"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Kai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5063781430","display_name":"Alois Knoll","orcid":"https://orcid.org/0000-0003-4840-076X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Knoll, Alois","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.60916,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":59,"max":69},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9897,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9897,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9379,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.86731875},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8326025},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7619227},{"id":"https://openalex.org/C2781002164","wikidata":"https://www.wikidata.org/wiki/Q6822311","display_name":"Meta learning (computer science)","level":3,"score":0.71939766},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.64700234},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.56773955},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5293266},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.51856524},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.42637527},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33149707},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.11879808},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07634598},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2209.10656","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2209.10656","pdf_url":"http://arxiv.org/pdf/2209.10656","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2209.10656","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2209.10656","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4383109125","https://openalex.org/W4310083477","https://openalex.org/W4294873804","https://openalex.org/W4283332751","https://openalex.org/W3130669838","https://openalex.org/W2920061524","https://openalex.org/W2891227010","https://openalex.org/W2501594388","https://openalex.org/W2328553770","https://openalex.org/W1977959518"],"abstract_inverted_index":{"Meta-reinforcement":[0],"learning":[1,63,82,89,93,125],"(meta-RL)":[2],"is":[3],"a":[4,86],"promising":[5],"approach":[6],"that":[7,91,116],"enables":[8,92],"the":[9,28,39,47,78,122],"agent":[10],"to":[11,27],"learn":[12],"new":[13,65,94],"tasks":[14,95],"quickly.":[15],"However,":[16],"most":[17],"meta-RL":[18,37,74],"algorithms":[19],"show":[20,115],"poor":[21],"generalization":[22,40,80,123],"in":[23,107],"multi-task":[24],"scenarios":[25],"due":[26],"insufficient":[29],"task":[30],"information":[31],"provided":[32],"only":[33],"by":[34,42],"rewards.":[35],"Language-conditioned":[36],"improves":[38],"capability":[41],"matching":[43],"language":[44,54,71,101],"instructions":[45,55,72],"with":[46,97],"agent's":[48],"behaviors.":[49],"While":[50],"both":[51],"behaviors":[52,99],"and":[53,70,81,100,112,124],"have":[56],"symmetry,":[57],"which":[58],"can":[59,75,119],"speed":[60],"up":[61],"human":[62],"of":[64,127],"knowledge.":[66],"Thus,":[67],"combining":[68],"symmetry":[69],"into":[73],"help":[76],"improve":[77,121],"algorithm's":[79],"efficiency.":[83],"We":[84,103],"propose":[85],"dual-MDP":[87],"meta-reinforcement":[88,128],"method":[90,106,118],"efficiently":[96],"symmetrical":[98],"instructions.":[102],"evaluate":[104],"our":[105,117],"multiple":[108],"challenging":[109],"manipulation":[110],"tasks,":[111],"experimental":[113],"results":[114],"greatly":[120],"efficiency":[126],"learning.":[129],"Videos":[130],"are":[131],"available":[132],"at":[133],"https://tumi6robot.wixsite.com/symmetry/.":[134]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4296932844","counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2025-04-16T20:46:23.123858","created_date":"2022-09-24"}