{"id":"https://openalex.org/W4384268425","doi":"https://doi.org/10.48550/arxiv.2307.06175","title":"Learning Decentralized Partially Observable Mean Field Control for Artificial Collective Behavior","display_name":"Learning Decentralized Partially Observable Mean Field Control for Artificial Collective Behavior","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4384268425","doi":"https://doi.org/10.48550/arxiv.2307.06175"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2307.06175","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2307.06175","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082267036","display_name":"Kai Cui","orcid":"https://orcid.org/0000-0002-2605-0386"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cui, Kai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108850552","display_name":"Sascha Hauck","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hauck, Sascha","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067265291","display_name":"Christian Fabi\u00e1n","orcid":"https://orcid.org/0000-0003-4239-3861"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fabian, Christian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5070544702","display_name":"Heinz Koeppl","orcid":"https://orcid.org/0000-0002-8305-9379"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Koeppl, Heinz","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":67,"max":78},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7368,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7368,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13553","display_name":"Age of Information Optimization","score":0.7228,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10249","display_name":"Distributed Control Multi-Agent Systems","score":0.6842,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/decentralised-system","display_name":"Decentralised system","score":0.49664265},{"id":"https://openalex.org/keywords/observability","display_name":"Observability","score":0.42069605}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7014859},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.65164113},{"id":"https://openalex.org/C32848918","wikidata":"https://www.wikidata.org/wiki/Q845789","display_name":"Observable","level":2,"score":0.62506473},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.51173663},{"id":"https://openalex.org/C205875254","wikidata":"https://www.wikidata.org/wiki/Q17156857","display_name":"Decentralised system","level":3,"score":0.49664265},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4262269},{"id":"https://openalex.org/C36299963","wikidata":"https://www.wikidata.org/wiki/Q1369844","display_name":"Observability","level":2,"score":0.42069605},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39929116},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.18515587},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.18002093},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.12583554},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.090257645},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2307.06175","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2307.06175","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2307.06175","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.78,"display_name":"Peace, justice, and strong institutions"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4312300846","https://openalex.org/W4206221578","https://openalex.org/W3157641275","https://openalex.org/W2967463586","https://openalex.org/W2765830098","https://openalex.org/W2517338020","https://openalex.org/W2046459260","https://openalex.org/W1971989957","https://openalex.org/W1500110844","https://openalex.org/W11481024"],"abstract_inverted_index":{"Recent":[0],"reinforcement":[1],"learning":[2],"(RL)":[3],"methods":[4,155],"have":[5],"achieved":[6],"success":[7],"in":[8,18],"various":[9],"domains.":[10],"However,":[11],"multi-agent":[12],"RL":[13,124],"(MARL)":[14],"remains":[15,39],"a":[16,68,104,132,217],"challenge":[17],"terms":[19],"of":[20,34,40,89,107,147,183,222],"decentralization,":[21],"partial":[22,92],"observability":[23],"and":[24,38,55,78,161,203],"scalability":[25],"to":[26,42,71,75,115,144],"many":[27,43],"agents.":[28],"Meanwhile,":[29],"collective":[30,196,224],"behavior":[31,88,197,225],"requires":[32],"resolution":[33],"the":[35],"aforementioned":[36],"challenges,":[37],"importance":[41],"state-of-the-art":[44,175,211],"applications":[45],"such":[46,199],"as":[47,200],"active":[48],"matter":[49],"physics,":[50],"self-organizing":[51],"systems,":[52],"opinion":[53],"dynamics,":[54],"biological":[56],"or":[57],"robotic":[58],"swarms.":[59],"Here,":[60],"MARL":[61,157],"via":[62,158,226],"mean":[63],"field":[64],"control":[65],"(MFC)":[66],"offers":[67],"potential":[69],"solution":[70],"scalability,":[72],"but":[73],"fails":[74],"consider":[76],"decentralized":[77,87,99,162],"partially":[79,100],"observable":[80,101,189],"systems.":[81],"In":[82,170],"this":[83],"paper,":[84],"we":[85,150,172],"enable":[86],"agents":[90,111],"under":[91],"information":[93],"by":[94,178],"proposing":[95],"novel":[96],"models":[97],"for":[98,113,140,156,187],"MFC":[102,177],"(Dec-POMFC),":[103],"broad":[105],"class":[106],"problems":[108],"with":[109,122,137,165,210],"permutation-invariant":[110],"allowing":[112],"reduction":[114],"tractable":[116],"single-agent":[117,123],"Markov":[118],"decision":[119],"processes":[120],"(MDP)":[121],"solution.":[125],"We":[126,191],"provide":[127],"rigorous":[128],"theoretical":[129],"results,":[130],"including":[131],"dynamic":[133],"programming":[134],"principle,":[135],"together":[136,164],"optimality":[138],"guarantees":[139],"Dec-POMFC":[141],"solutions":[142],"applied":[143],"finite":[145],"swarms":[146],"interest.":[148],"Algorithmically,":[149],"propose":[151],"Dec-POMFC-based":[152],"policy":[153,166],"gradient":[154,167],"centralized":[159],"training":[160],"execution,":[163],"approximation":[168],"guarantees.":[169],"addition,":[171],"improve":[173],"upon":[174],"histogram-based":[176],"kernel":[179],"methods,":[180],"which":[181],"is":[182],"separate":[184],"interest":[185],"also":[186],"fully":[188],"MFC.":[190,227],"evaluate":[192],"numerically":[193],"on":[194,208],"representative":[195],"tasks":[198],"adapted":[201],"Kuramoto":[202],"Vicsek":[204],"swarming":[205],"models,":[206],"being":[207],"par":[209],"MARL.":[212],"Overall,":[213],"our":[214],"framework":[215],"takes":[216],"step":[218],"towards":[219],"RL-based":[220],"engineering":[221],"artificial":[223]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4384268425","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-01-04T16:03:03.091846","created_date":"2023-07-14"}