{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,11,23]],"date-time":"2024-11-23T06:04:59Z","timestamp":1732341899955,"version":"3.28.0"},"reference-count":42,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100012165","name":"Key Technologies Research and Development Program","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012165","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2018AAA0101000"],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62076028"],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Information Sciences"],"published-print":{"date-parts":[[2025,2]]},"DOI":"10.1016\/j.ins.2024.121646","type":"journal-article","created":{"date-parts":[[2024,11,15]],"date-time":"2024-11-15T17:59:06Z","timestamp":1731693546000},"page":"121646","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["GDT: Multi-agent reinforcement learning framework based on adaptive grouping dynamic topological space"],"prefix":"10.1016","volume":"691","author":[{"ORCID":"http:\/\/orcid.org\/0009-0006-9309-5628","authenticated-orcid":false,"given":"Licheng","family":"Sun","sequence":"first","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0002-5734-3157","authenticated-orcid":false,"given":"Hongbin","family":"Ma","sequence":"additional","affiliation":[]},{"given":"Zhentao","family":"Guo","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.ins.2024.121646_br0010","article-title":"All learning is local: Multi-agent learning in global reward games","volume":"16","author":"Chang","year":"2003","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.ins.2024.121646_br0020","article-title":"Learning to communicate with deep multi-agent reinforcement learning","volume":"29","author":"Foerster","year":"2016","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.ins.2024.121646_br0030","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","article-title":"Counterfactual multi-agent policy gradients","author":"Foerster","year":"2018"},{"key":"10.1016\/j.ins.2024.121646_br0040","series-title":"International Conference on Machine Learning","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","author":"Fujimoto","year":"2018"},{"author":"Hao","key":"10.1016\/j.ins.2024.121646_br0050"},{"key":"10.1016\/j.ins.2024.121646_br0060","series-title":"2015 AAAI Fall Symposium Series","article-title":"Deep recurrent q-learning for partially observable MDPs","author":"Hausknecht","year":"2015"},{"key":"10.1016\/j.ins.2024.121646_br0070","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","article-title":"Deep reinforcement learning that matters","author":"Henderson","year":"2018"},{"author":"Hu","key":"10.1016\/j.ins.2024.121646_br0080"},{"key":"10.1016\/j.ins.2024.121646_br0100","series-title":"2021 20th IEEE International Conference on Machine Learning and Applications (ICMLA)","first-page":"1229","article-title":"Leveraging transformers for StarCraft macromanagement prediction","author":"Khan","year":"2021"},{"key":"10.1016\/j.ins.2024.121646_br0110","series-title":"International Conference on Machine Learning","first-page":"5794","article-title":"Revisiting Peng's q (\u03bb) for modern reinforcement learning","author":"Kozuno","year":"2021"},{"key":"10.1016\/j.ins.2024.121646_br0120","doi-asserted-by":"crossref","first-page":"82","DOI":"10.1016\/j.neucom.2016.01.031","article-title":"Multi-agent reinforcement learning as a rehearsal for decentralized planning","volume":"190","author":"Kraemer","year":"2016","journal-title":"Neurocomputing"},{"key":"10.1016\/j.ins.2024.121646_br0130","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"4501","article-title":"Google research football: A novel reinforcement learning environment","author":"Kurach","year":"2020"},{"author":"Li","key":"10.1016\/j.ins.2024.121646_br0140"},{"author":"Lillicrap","key":"10.1016\/j.ins.2024.121646_br0150"},{"author":"Liu","key":"10.1016\/j.ins.2024.121646_br0160"},{"key":"10.1016\/j.ins.2024.121646_br0170","doi-asserted-by":"crossref","DOI":"10.1016\/j.ins.2023.119085","article-title":"Decomposing shared networks for separate cooperation with multi-agent reinforcement learning","volume":"641","author":"Liu","year":"2023","journal-title":"Inf. Sci."},{"key":"10.1016\/j.ins.2024.121646_br0180","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume":"30","author":"Lowe","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.ins.2024.121646_br0190","article-title":"Maven: Multi-agent variational exploration","volume":"32","author":"Mahajan","year":"2019","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.ins.2024.121646_br0200","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1017\/S0269888912000057","article-title":"Independent reinforcement learners in cooperative Markov games: a survey regarding coordination problems","volume":"27","author":"Matignon","year":"2012","journal-title":"Knowl. Eng. Rev."},{"key":"10.1016\/j.ins.2024.121646_br0210","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"Mnih","year":"2015","journal-title":"Nature"},{"key":"10.1016\/j.ins.2024.121646_br0220","doi-asserted-by":"crossref","first-page":"289","DOI":"10.1613\/jair.2447","article-title":"Optimal and approximate q-value functions for decentralized POMDPs","volume":"32","author":"Oliehoek","year":"2008","journal-title":"J. Artif. Intell. Res."},{"key":"10.1016\/j.ins.2024.121646_br0230","doi-asserted-by":"crossref","unstructured":"S.C. Ong, S.W. Png, D. Hsu, W.S. Lee, POMDPs for robotic tasks with mixed observability, 2010.","DOI":"10.7551\/mitpress\/8727.003.0027"},{"key":"10.1016\/j.ins.2024.121646_br0240","series-title":"International Conference on Machine Learning","first-page":"7487","article-title":"Stabilizing transformers for reinforcement learning","author":"Parisotto","year":"2020"},{"key":"10.1016\/j.ins.2024.121646_br0250","first-page":"10199","article-title":"Weighted qmix: Expanding monotonic value function factorisation for deep multi-agent reinforcement learning","volume":"33","author":"Rashid","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.ins.2024.121646_br0260","first-page":"1","article-title":"Monotonic value function factorisation for deep multi-agent reinforcement learning","volume":"21","author":"Rashid","year":"2020","journal-title":"J. Mach. Learn. Res."},{"author":"Samvelyan","key":"10.1016\/j.ins.2024.121646_br0270"},{"key":"10.1016\/j.ins.2024.121646_br0280","series-title":"International Conference on Machine Learning","first-page":"5887","article-title":"Qtran: Learning to factorize with transformation for cooperative multi-agent reinforcement learning","author":"Son","year":"2019"},{"key":"10.1016\/j.ins.2024.121646_br0290","article-title":"Learning multiagent communication with backpropagation","volume":"29","author":"Sukhbaatar","year":"2016","journal-title":"Adv. Neural Inf. Process. Syst."},{"author":"Sunehag","key":"10.1016\/j.ins.2024.121646_br0300"},{"key":"10.1016\/j.ins.2024.121646_br0310","series-title":"2020 IEEE International Conference on Robotics and Automation (ICRA)","first-page":"5942","article-title":"Scaled autonomy: Enabling human operators to control robot fleets","author":"Swamy","year":"2020"},{"key":"10.1016\/j.ins.2024.121646_br0320","doi-asserted-by":"crossref","DOI":"10.1371\/journal.pone.0172395","article-title":"Multiagent cooperation and competition with deep reinforcement learning","volume":"12","author":"Tampuu","year":"2017","journal-title":"PLoS ONE"},{"key":"10.1016\/j.ins.2024.121646_br0330","series-title":"Proceedings of the Tenth International Conference on Machine Learning","first-page":"330","article-title":"Multi-agent reinforcement learning: Independent vs. cooperative agents","author":"Tan","year":"1993"},{"key":"10.1016\/j.ins.2024.121646_br0340","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","article-title":"Deep reinforcement learning with double q-learning","author":"Van Hasselt","year":"2016"},{"key":"10.1016\/j.ins.2024.121646_br0350","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"author":"Wang","key":"10.1016\/j.ins.2024.121646_br0360"},{"author":"Wang","key":"10.1016\/j.ins.2024.121646_br0370"},{"key":"10.1016\/j.ins.2024.121646_br0380","doi-asserted-by":"crossref","DOI":"10.1016\/j.ins.2024.120560","article-title":"Adaptive mean field multi-agent reinforcement learning","volume":"669","author":"Wang","year":"2024","journal-title":"Inf. Sci."},{"key":"10.1016\/j.ins.2024.121646_br0390","series-title":"International Conference on Machine Learning","first-page":"1995","article-title":"Dueling network architectures for deep reinforcement learning","author":"Wang","year":"2016"},{"key":"10.1016\/j.ins.2024.121646_br0400","unstructured":"C.J.C.H. Watkins, Learning from delayed rewards, 1989."},{"key":"10.1016\/j.ins.2024.121646_br0410","doi-asserted-by":"crossref","DOI":"10.1016\/j.ins.2023.119074","article-title":"A graph neural network based deep reinforcement learning algorithm for multi-agent leader-follower flocking","volume":"641","author":"Xiao","year":"2023","journal-title":"Inf. Sci."},{"author":"Yang","key":"10.1016\/j.ins.2024.121646_br0420"},{"key":"10.1016\/j.ins.2024.121646_br0430","series-title":"International Conference on Machine Learning","first-page":"5571","article-title":"Mean field multi-agent reinforcement learning","author":"Yang","year":"2018"}],"container-title":["Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0020025524015603?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0020025524015603?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,11,23]],"date-time":"2024-11-23T02:58:44Z","timestamp":1732330724000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0020025524015603"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2]]},"references-count":42,"alternative-id":["S0020025524015603"],"URL":"https:\/\/doi.org\/10.1016\/j.ins.2024.121646","relation":{},"ISSN":["0020-0255"],"issn-type":[{"type":"print","value":"0020-0255"}],"subject":[],"published":{"date-parts":[[2025,2]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"GDT: Multi-agent reinforcement learning framework based on adaptive grouping dynamic topological space","name":"articletitle","label":"Article Title"},{"value":"Information Sciences","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.ins.2024.121646","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2024 Elsevier Inc. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"121646"}}