{"id":"https://openalex.org/W4360599120","doi":"https://doi.org/10.48550/arxiv.2303.10665","title":"Multi-Agent Reinforcement Learning via Mean Field Control: Common Noise, Major Agents and Approximation Properties","display_name":"Multi-Agent Reinforcement Learning via Mean Field Control: Common Noise, Major Agents and Approximation Properties","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4360599120","doi":"https://doi.org/10.48550/arxiv.2303.10665"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2303.10665","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2303.10665","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082267036","display_name":"Kai Cui","orcid":"https://orcid.org/0000-0002-2605-0386"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cui, Kai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067265291","display_name":"Christian Fabi\u00e1n","orcid":"https://orcid.org/0000-0003-4239-3861"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fabian, Christian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5070544702","display_name":"Heinz Koeppl","orcid":"https://orcid.org/0000-0002-8305-9379"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Koeppl, Heinz","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.78898,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":84,"max":87},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.9512,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.9512,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9479,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10471","display_name":"Climate Change Policy and Economics","score":0.9242,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/minor","display_name":"Minor (academic)","score":0.80586237}],"concepts":[{"id":"https://openalex.org/C2779760435","wikidata":"https://www.wikidata.org/wiki/Q5396169","display_name":"Minor (academic)","level":2,"score":0.80586237},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.70582443},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5907951},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.58430636},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.5753967},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.54388785},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5302092},{"id":"https://openalex.org/C37404715","wikidata":"https://www.wikidata.org/wiki/Q380679","display_name":"Dynamic programming","level":2,"score":0.41747904},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.39333677},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.35743612},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.20664081},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2303.10665","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2303.10665","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2303.10665","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.8,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, justice, and strong institutions"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4389475841","https://openalex.org/W4285537323","https://openalex.org/W4241327272","https://openalex.org/W4211224558","https://openalex.org/W2903299703","https://openalex.org/W2379312070","https://openalex.org/W2120406836","https://openalex.org/W2117282672","https://openalex.org/W1996214847","https://openalex.org/W1932159282"],"abstract_inverted_index":{"Recently,":[0],"mean":[1,61,147],"field":[2,62,148],"control":[3,63],"(MFC)":[4],"has":[5],"provided":[6],"a":[7,39,122,155],"tractable":[8],"and":[9,47,54,111,161],"theoretically":[10],"founded":[11],"approach":[12],"to":[13,49,67,96],"otherwise":[14],"difficult":[15],"cooperative":[16],"multi-agent":[17,118,157],"control.":[18],"However,":[19],"the":[20,84,116,131,145],"strict":[21],"assumption":[22],"of":[23,43,136],"many":[24,51],"independent,":[25],"homogeneous":[26],"agents":[27,53,58,82],"may":[28],"be":[29],"too":[30],"stringent":[31],"in":[32,115,165],"practice.":[33],"In":[34,65,130],"this":[35],"work,":[36],"we":[37,100,143],"propose":[38,144],"novel":[40,106,156],"discrete-time":[41],"generalization":[42],"Markov":[44],"decision":[45],"processes":[46],"MFC":[48,113],"both":[50,109],"minor":[52,74,81],"potentially":[55],"complex":[56],"major":[57,85],"--":[59],"major-minor":[60,146],"(M3FC).":[64],"contrast":[66],"deterministic":[68],"MFC,":[69],"M3FC":[70,110],"allows":[71],"for":[72,108,126],"stochastic":[73],"agent":[75,86],"distributions":[76],"with":[77,105,121],"strong":[78],"correlation":[79],"between":[80],"through":[83],"state,":[87],"which":[88],"can":[89],"model":[90],"arbitrary":[91],"problem":[92],"details":[93],"not":[94],"bound":[95],"any":[97],"agent.":[98],"Theoretically,":[99],"give":[101],"rigorous":[102],"approximation":[103],"properties":[104],"proofs":[107],"existing":[112],"models":[114],"finite":[117],"problem,":[119],"together":[120],"dynamic":[123],"programming":[124],"principle":[125],"solving":[127],"such":[128],"problems.":[129,168],"infinite-horizon":[132],"discounted":[133],"case,":[134],"existence":[135],"an":[137],"optimal":[138],"stationary":[139],"policy":[140,150],"follows.":[141],"Algorithmically,":[142],"proximal":[149],"optimization":[151],"algorithm":[152,160],"(M3FPPO)":[153],"as":[154],"reinforcement":[158],"learning":[159],"demonstrate":[162],"its":[163],"success":[164],"illustrative":[166],"M3FC-type":[167]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4360599120","counts_by_year":[{"year":2024,"cited_by_count":3}],"updated_date":"2025-01-16T18:14:17.513692","created_date":"2023-03-24"}