{"id":"https://openalex.org/W4292945870","doi":"https://doi.org/10.48550/arxiv.2208.10458","title":"Minimax-Optimal Multi-Agent RL in Markov Games With a Generative Model","display_name":"Minimax-Optimal Multi-Agent RL in Markov Games With a Generative Model","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4292945870","doi":"https://doi.org/10.48550/arxiv.2208.10458"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2208.10458","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2208.10458","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100357017","display_name":"Gen Li","orcid":"https://orcid.org/0000-0002-0649-9493"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Gen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053809095","display_name":"Yuejie Chi","orcid":"https://orcid.org/0000-0002-6766-5459"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chi, Yuejie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005015806","display_name":"Yuting Wei","orcid":"https://orcid.org/0000-0003-1488-4647"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Yuting","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100416078","display_name":"Yuxin Chen","orcid":"https://orcid.org/0000-0001-9256-5815"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yuxin","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.709386,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":59,"max":69},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11031","display_name":"Game Theory and Applications","score":0.9939,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11031","display_name":"Game Theory and Applications","score":0.9939,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9827,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9785,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5473743},{"id":"https://openalex.org/keywords/thompson-sampling","display_name":"Thompson Sampling","score":0.4114069}],"concepts":[{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.6216198},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.61710954},{"id":"https://openalex.org/C149728462","wikidata":"https://www.wikidata.org/wiki/Q751319","display_name":"Minimax","level":2,"score":0.6157492},{"id":"https://openalex.org/C46814582","wikidata":"https://www.wikidata.org/wiki/Q23389","display_name":"Nash equilibrium","level":2,"score":0.60448533},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5473743},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.5366551},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.5047568},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.47400483},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.45219857},{"id":"https://openalex.org/C50817715","wikidata":"https://www.wikidata.org/wiki/Q79895177","display_name":"Regret","level":2,"score":0.44257438},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.42593634},{"id":"https://openalex.org/C73602740","wikidata":"https://www.wikidata.org/wiki/Q7795822","display_name":"Thompson sampling","level":3,"score":0.4114069},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.39929777},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.34645903},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.3384456},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.26326317},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.13560736}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2208.10458","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2208.10458","pdf_url":"http://arxiv.org/pdf/2208.10458","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2208.10458","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2208.10458","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4287863949","https://openalex.org/W4287102143","https://openalex.org/W3182614517","https://openalex.org/W3176022311","https://openalex.org/W3046298489","https://openalex.org/W2970347269","https://openalex.org/W2945119207","https://openalex.org/W1970303738","https://openalex.org/W1850547517","https://openalex.org/W1850488217"],"abstract_inverted_index":{"This":[0,151],"paper":[1],"studies":[2],"multi-agent":[3],"reinforcement":[4],"learning":[5,13,80,96],"in":[6,50,93,108],"Markov":[7,74,111,170],"games,":[8,75,171],"with":[9,180],"the":[10,31,34,40,47,67,90,98,126,132,138,143,148,159,184,197],"goal":[11],"of":[12,30,36,42,46,128,134,145,161,199,205],"Nash":[14,178],"equilibria":[15,19],"or":[16],"coarse":[17],"correlated":[18],"(CCE)":[20],"sample-optimally.":[21],"All":[22],"prior":[23],"results":[24],"suffer":[25],"from":[26],"at":[27],"least":[28],"one":[29],"two":[32],"obstacles:":[33],"curse":[35],"multiple":[37],"agents":[38],"and":[39,140],"barrier":[41],"long":[43],"horizon,":[44,139],"regardless":[45],"sampling":[48,65,86],"protocol":[49],"use.":[51],"We":[52],"take":[53],"a":[54,63,78,109,188],"step":[55],"towards":[56],"settling":[57],"this":[58],"problem,":[59],"assuming":[60],"access":[61],"to":[62,155,167],"flexible":[64],"mechanism:":[66],"generative":[68],"model.":[69],"Focusing":[70],"on":[71],"non-stationary":[72],"finite-horizon":[73],"we":[76,186],"develop":[77],"fast":[79],"algorithm":[81,103,173],"called":[82],"\\myalg~and":[83],"an":[84,105,176],"adaptive":[85],"scheme":[87],"that":[88,194],"leverage":[89],"optimism":[91],"principle":[92],"online":[94],"adversarial":[95],"(particularly":[97],"Follow-the-Regularized-Leader":[99],"(FTRL)":[100],"method).":[101],"Our":[102],"learns":[104],"$\\varepsilon$-approximate":[106,177],"CCE":[107],"general-sum":[110],"game":[112],"using":[113],"$$":[114,121],"\\widetilde{O}\\bigg(":[115],"\\frac{H^4":[116],"S":[117],"\\sum_{i=1}^m":[118],"A_i}{\\varepsilon^2}":[119],"\\bigg)":[120],"samples,":[122],"where":[123],"$m$":[124],"is":[125,137,152,163],"number":[127,133,144,160],"players,":[129],"$S$":[130],"indicates":[131],"states,":[135],"$H$":[136],"$A_i$":[141],"denotes":[142],"actions":[146],"for":[147,192],"$i$-th":[149],"player.":[150],"minimax-optimal":[153],"(up":[154],"log":[156],"factor)":[157],"when":[158],"players":[162],"fixed.":[164],"When":[165],"applied":[166],"two-player":[168],"zero-sum":[169],"our":[172],"provably":[174],"finds":[175],"equilibrium":[179],"minimal":[181],"samples.":[182],"Along":[183],"way,":[185],"derive":[187],"refined":[189],"regret":[190],"bound":[191],"FTRL":[193],"makes":[195],"explicit":[196],"role":[198],"variance-type":[200],"quantities,":[201],"which":[202],"might":[203],"be":[204],"independent":[206],"interest.":[207]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4292945870","counts_by_year":[{"year":2022,"cited_by_count":1}],"updated_date":"2025-04-18T14:25:05.984266","created_date":"2022-08-24"}