{"id":"https://openalex.org/W4383109237","doi":"https://doi.org/10.1109/icra48891.2023.10160875","title":"Stackelberg Games for Learning Emergent Behaviors During Competitive Autocurricula","display_name":"Stackelberg Games for Learning Emergent Behaviors During Competitive Autocurricula","publication_year":2023,"publication_date":"2023-05-29","ids":{"openalex":"https://openalex.org/W4383109237","doi":"https://doi.org/10.1109/icra48891.2023.10160875"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra48891.2023.10160875","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2305.03735","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074328657","display_name":"Boling Yang","orcid":"https://orcid.org/0000-0002-6211-122X"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Boling Yang","raw_affiliation_strings":["Paul G. Allen School of Computer Science & Engineering, University of Washington"],"affiliations":[{"raw_affiliation_string":"Paul G. Allen School of Computer Science & Engineering, University of Washington","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011436134","display_name":"Liyuan Zheng","orcid":"https://orcid.org/0000-0002-7478-8860"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Liyuan Zheng","raw_affiliation_strings":["Electrical and Computer Engineering Department, University of Washington"],"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering Department, University of Washington","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008161296","display_name":"Lillian J. Ratliff","orcid":"https://orcid.org/0000-0001-8936-0229"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lillian J. Ratliff","raw_affiliation_strings":["Electrical and Computer Engineering Department, University of Washington"],"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering Department, University of Washington","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110797782","display_name":"Byron Boots","orcid":null},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Byron Boots","raw_affiliation_strings":["Paul G. Allen School of Computer Science & Engineering, University of Washington"],"affiliations":[{"raw_affiliation_string":"Paul G. Allen School of Computer Science & Engineering, University of Washington","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019043770","display_name":"Joshua R. Smith","orcid":"https://orcid.org/0000-0002-5331-4770"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Joshua R. Smith","raw_affiliation_strings":["Electrical and Computer Engineering Department, University of Washington","Paul G. Allen School of Computer Science & Engineering, University of Washington"],"affiliations":[{"raw_affiliation_string":"Paul G. Allen School of Computer Science & Engineering, University of Washington","institution_ids":["https://openalex.org/I201448701"]},{"raw_affiliation_string":"Electrical and Computer Engineering Department, University of Washington","institution_ids":["https://openalex.org/I201448701"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.352,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.606989,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":67,"max":78},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9952,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11252","display_name":"Evolutionary Game Theory and Cooperation","score":0.9658,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stackelberg-competition","display_name":"Stackelberg competition","score":0.90198183}],"concepts":[{"id":"https://openalex.org/C199510392","wikidata":"https://www.wikidata.org/wiki/Q1184602","display_name":"Stackelberg competition","level":2,"score":0.90198183},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7847725},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7531693},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.60131925},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.49927807},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4634075},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.4520566},{"id":"https://openalex.org/C41065033","wikidata":"https://www.wikidata.org/wiki/Q2825412","display_name":"Adversary","level":2,"score":0.4486912},{"id":"https://openalex.org/C177142836","wikidata":"https://www.wikidata.org/wiki/Q44455","display_name":"Game theory","level":2,"score":0.4442304},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37788856},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.08660081},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.08405423},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra48891.2023.10160875","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.03735","pdf_url":"https://arxiv.org/pdf/2305.03735","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.03735","pdf_url":"https://arxiv.org/pdf/2305.03735","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality education","score":0.47}],"grants":[{"funder":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation","award_id":"EFMA-1832795"}],"datasets":[],"versions":[],"referenced_works_count":26,"referenced_works":["https://openalex.org/W1496590343","https://openalex.org/W2342662072","https://openalex.org/W2602963933","https://openalex.org/W2603088459","https://openalex.org/W2727450595","https://openalex.org/W2772709170","https://openalex.org/W2973525135","https://openalex.org/W2982316857","https://openalex.org/W2996037775","https://openalex.org/W3017367030","https://openalex.org/W3035723062","https://openalex.org/W3046366290","https://openalex.org/W3107615218","https://openalex.org/W3110161557","https://openalex.org/W3120327542","https://openalex.org/W3133757280","https://openalex.org/W3184085787","https://openalex.org/W3203930949","https://openalex.org/W3210994662","https://openalex.org/W4236905297","https://openalex.org/W4295150809","https://openalex.org/W4297627396","https://openalex.org/W4297810554","https://openalex.org/W4298090512","https://openalex.org/W4299802797","https://openalex.org/W586722081"],"related_works":["https://openalex.org/W4383533535","https://openalex.org/W4380353856","https://openalex.org/W3169439878","https://openalex.org/W2948291551","https://openalex.org/W2605497830","https://openalex.org/W2399846249","https://openalex.org/W2375814310","https://openalex.org/W2347216446","https://openalex.org/W2062756478","https://openalex.org/W1987562536"],"abstract_inverted_index":{"Autocurricular":[0],"training":[1,30],"is":[2],"an":[3,20,68,129,177],"important":[4],"sub-area":[5],"of":[6,47,53,73,160],"multi-agent":[7],"reinforcement":[8],"learning":[9],"(MARL)":[10],"that":[11,134,172],"allows":[12],"multiple":[13],"agents":[14,74],"to":[15,70,80,143],"learn":[16],"emergent":[17],"skills":[18],"in":[19,60,77,120,148,166],"unsupervised":[21],"co-evolving":[22],"scheme.":[23],"The":[24],"robotics":[25],"community":[26],"has":[27,128],"experimented":[28],"auto-curricular":[29],"with":[31,108],"physically":[32],"grounded":[33],"problems,":[34],"such":[35],"as":[36,104,111,117],"robust":[37],"control":[38],"and":[39,114,164,169],"interactive":[40],"manipulation":[41],"tasks.":[42],"However,":[43],"the":[44,51,58,61,112,115,118,126,135,145,149,153,158],"asymmetric":[45],"nature":[46],"these":[48],"tasks":[49],"makes":[50],"generation":[52],"sophisticated":[54,168],"policies":[55],"challenging.":[56],"Indeed,":[57],"asymmetry":[59,147],"environment":[62],"may":[63],"implicitly":[64],"or":[65],"explicitly":[66],"provide":[67],"advantage":[69,137],"a":[71,81,87,100,105,121,161],"subset":[72],"which":[75,98],"could,":[76],"turn,":[78],"lead":[79],"low-quality":[82],"equilibrium.":[83],"This":[84],"paper":[85],"proposes":[86],"novel":[88],"game-theoretic":[89],"algorithm,":[90],"Stackelberg":[91,106],"Multi-Agent":[92],"Deep":[93],"Deterministic":[94],"Policy":[95],"Gradient":[96],"(ST-MADDPG),":[97],"formulates":[99],"two-player":[101],"MARL":[102],"problem":[103],"game":[107],"one":[109],"player":[110],"'leader'":[113],"other":[116],"'follower'":[119],"hierarchical":[122],"interaction":[123],"structure":[124],"wherein":[125],"leader":[127],"advantage.":[130],"We":[131],"first":[132],"demonstrate":[133],"leader's":[136,154],"from":[138],"ST-MADDPG":[139,156],"can":[140],"be":[141],"used":[142],"alleviate":[144],"inherent":[146],"environment.":[150],"By":[151],"exploiting":[152],"advantage,":[155],"improves":[157],"quality":[159],"co-evolution":[162],"process":[163],"results":[165],"more":[167],"complex":[170],"strategies":[171],"work":[173],"well":[174],"even":[175],"against":[176],"unseen":[178],"strong":[179],"opponent.":[180]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4383109237","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-01-04T16:10:38.711366","created_date":"2023-07-05"}