{"id":"https://openalex.org/W4403346637","doi":"https://doi.org/10.48550/arxiv.2410.05673","title":"Learning Equilibria in Adversarial Team Markov Games: A\n Nonconvex-Hidden-Concave Min-Max Optimization Problem","display_name":"Learning Equilibria in Adversarial Team Markov Games: A\n Nonconvex-Hidden-Concave Min-Max Optimization Problem","publication_year":2024,"publication_date":"2024-10-08","ids":{"openalex":"https://openalex.org/W4403346637","doi":"https://doi.org/10.48550/arxiv.2410.05673"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.05673","pdf_url":"http://arxiv.org/pdf/2410.05673","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2410.05673","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069416974","display_name":"Fivos Kalogiannis","orcid":"https://orcid.org/0009-0002-5235-6116"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kalogiannis, Fivos","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100716663","display_name":"Jie Yan","orcid":"https://orcid.org/0000-0003-2040-5964"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Jingming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5083207837","display_name":"Ioannis Panageas","orcid":"https://orcid.org/0000-0003-0577-4147"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Panageas, Ioannis","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9667,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9667,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10328","display_name":"Supply Chain and Inventory Management","score":0.9584,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11031","display_name":"Game Theory and Applications","score":0.9476,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.6461875},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.54485434},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.53870404},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.49812388},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.48670104},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.4607097},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.29502675},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.28803647},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.27703077},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.16233867},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.10884464}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.05673","pdf_url":"http://arxiv.org/pdf/2410.05673","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.05673","pdf_url":"http://arxiv.org/pdf/2410.05673","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4310988119","https://openalex.org/W4297672492","https://openalex.org/W4288019534","https://openalex.org/W4285226279","https://openalex.org/W4246396837","https://openalex.org/W3191453585","https://openalex.org/W3126451824","https://openalex.org/W2502115930","https://openalex.org/W2482350142","https://openalex.org/W1561927205"],"abstract_inverted_index":{"We":[0,97,210,243],"study":[1],"the":[2,47,86,117,122,126,129,133,146,168,191,212,220,224,234,257],"problem":[3],"of":[4,85,125,132,148,170,193,223,236,259],"learning":[5,20,100,149],"a":[6,15,38,44,62,99,174,180,228,237],"Nash":[7],"equilibrium":[8,77],"(NE)":[9],"in":[10,17,33,61,116,176,198],"Markov":[11,30,54,57,156],"games":[12,31,50,55,161,178],"which":[13,34,82],"is":[14,114,141,203],"cornerstone":[16],"multi-agent":[18],"reinforcement":[19],"(MARL).":[21],"In":[22],"particular,":[23],"we":[24],"focus":[25],"on":[26],"infinite-horizon":[27],"adversarial":[28],"team":[29],"(ATMGs)":[32],"agents":[35],"that":[36,64,102,113,144,218],"share":[37],"common":[39],"reward":[40,87],"function":[41,226],"compete":[42],"against":[43],"single":[45],"opponent,":[46],"adversary.":[48],"These":[49],"unify":[51],"two-player":[52,157],"zero-sum":[53,158],"and":[56,68,88,91,110,121,159],"potential":[58,160],"games,":[59],"resulting":[60],"setting":[63],"encompasses":[65],"both":[66],"collaboration":[67],"competition.":[69],"Kalogiannis":[70],"et":[71,137,207,261],"al.":[72],"(2023a)":[73],"provided":[74],"an":[75],"efficient":[76],"computation":[78],"algorithm":[79,101],"for":[80,151,155,164,251],"ATMGs":[81],"presumes":[83],"knowledge":[84],"transition":[89],"functions":[90],"has":[92,186],"no":[93],"sample":[94,111],"complexity":[95,112],"guarantees.":[96],"contribute":[98],"utilizes":[103],"MARL":[104],"policy":[105],"gradient":[106],"methods":[107],"with":[108,240],"iteration":[109],"polynomial":[115],"approximation":[118],"error":[119],"$\\epsilon$":[120],"natural":[123],"parameters":[124],"ATMG,":[127],"resolving":[128],"main":[130],"caveats":[131],"solution":[134],"by":[135,215,247],"(Kalogiannis":[136],"al.,":[138,208,262],"2023a).":[139],"It":[140],"worth":[142],"noting":[143],"previously,":[145],"existence":[147],"algorithms":[150],"NE":[152,175],"was":[153],"known":[154],"but":[162],"not":[163],"ATMGs.":[165],"Seen":[166],"through":[167],"lens":[169],"min-max":[171],"optimization,":[172],"computing":[173],"these":[177,245],"consists":[179],"nonconvex-nonconcave":[181,194],"saddle-point":[182],"problem.":[183],"Min-max":[184],"optimization":[185],"received":[187],"extensive":[188],"study.":[189],"Nevertheless,":[190],"case":[192],"landscapes":[195],"remains":[196],"elusive:":[197],"full":[199],"generality,":[200],"finding":[201],"saddle-points":[202],"computationally":[204],"intractable":[205],"(Daskalakis":[206],"2021).":[209],"circumvent":[211],"aforementioned":[213],"intractability":[214],"developing":[216],"techniques":[217,250],"exploit":[219],"hidden":[221],"structure":[222],"objective":[225],"via":[227],"nonconvex-concave":[229],"reformulation.":[230],"However,":[231],"this":[232],"introduces":[233],"challenge":[235],"feasibility":[238],"set":[239],"coupled":[241],"constraints.":[242],"tackle":[244],"challenges":[246],"establishing":[248],"novel":[249],"optimizing":[252],"weakly-smooth":[253],"nonconvex":[254],"functions,":[255],"extending":[256],"framework":[258],"(Devolder":[260],"2014).":[263]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4403346637","counts_by_year":[],"updated_date":"2024-12-15T07:35:02.238806","created_date":"2024-10-12"}