{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T07:17:30Z","timestamp":1725952650118},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2022,8,26]],"date-time":"2022-08-26T00:00:00Z","timestamp":1661472000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,8,26]],"date-time":"2022-08-26T00:00:00Z","timestamp":1661472000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2020AAA0107400"],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["12071145"],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Open Research Projects of Zhejiang Lab","award":["2021KE0AB03"]},{"name":"Shenzhen Institute of Artificial Intelligence and Robotics for Society"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Auton Agent Multi-Agent Syst"],"published-print":{"date-parts":[[2022,10]]},"DOI":"10.1007\/s10458-022-09580-8","type":"journal-article","created":{"date-parts":[[2022,8,26]],"date-time":"2022-08-26T04:02:52Z","timestamp":1661486572000},"update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["Learning structured communication for multi-agent reinforcement learning"],"prefix":"10.1007","volume":"36","author":[{"given":"Junjie","family":"Sheng","sequence":"first","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0003-3064-5128","authenticated-orcid":false,"given":"Xiangfeng","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Bo","family":"Jin","sequence":"additional","affiliation":[]},{"given":"Junchi","family":"Yan","sequence":"additional","affiliation":[]},{"given":"Wenhao","family":"Li","sequence":"additional","affiliation":[]},{"given":"Tsung-Hui","family":"Chang","sequence":"additional","affiliation":[]},{"given":"Jun","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Hongyuan","family":"Zha","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,8,26]]},"reference":[{"issue":"5\u20136","key":"9580_CR1","doi-asserted-by":"publisher","first-page":"433","DOI":"10.1016\/S0968-090X(02)00030-X","volume":"10","author":"JL Adler","year":"2002","unstructured":"Adler, J. L., & Blue, V. J. (2002). A cooperative multi-agent transportation management and route guidance system. Transportation Research Part C: Emerging Technologies, 10(5\u20136), 433\u2013454.","journal-title":"Transportation Research Part C: Emerging Technologies"},{"key":"9580_CR2","unstructured":"Battaglia, P.W., Hamrick, J.B., Bapst, V., Sanchez-Gonzalez, A., Zambaldi, V., Malinowski, M., Tacchetti, A., Raposo, D., Santoro, A., Faulkner, R., et\u00a0al. (2018) Relational inductive biases, deep learning, and graph networks. arXiv preprint arXiv:1806.01261"},{"key":"9580_CR3","unstructured":"Bellemare, M.G., Dabney, W., & Munos, R. (2017). A distributional perspective on reinforcement learning. In International Conference on Machine Learning (pp. 449\u2013458). JMLR. org."},{"key":"9580_CR4","unstructured":"Das, A., Gervet, T., Romoff, J., Batra, D., Parikh, D., Rabbat, M., & Pineau, J. (2019). TarMAC: Targeted multi-agent communication. In International Conference on Machine Learning (pp. 1538\u20131546)."},{"key":"9580_CR5","unstructured":"Foerster, J., Assael, I.A., De Freitas, N., & Whiteson, S. (2016). Learning to communicate with deep multi-agent reinforcement learning. In Neural Information Processing Systems (pp. 2137\u20132145)."},{"key":"9580_CR6","doi-asserted-by":"crossref","unstructured":"Foerster, J.N., Farquhar, G., Afouras, T., Nardelli, N., & Whiteson, S. (2018). Counterfactual multi-agent policy gradients. In Association for the Advance of Artificial Intelligence (pp. 2974\u20132982).","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"9580_CR7","unstructured":"Gilmer, J., Schoenholz, S.S., Riley, P.F., Vinyals, O., & Dahl, G.E. (2017). Neural message passing for quantum chemistry. In International Conference on Machine Learning (pp. 1263\u20131272)."},{"key":"9580_CR8","unstructured":"Gilmer, J., Schoenholz, S.S., Riley, P.F., Vinyals, O., & Dahl, G.E. (2017). Neural message passing for quantum chemistry. In International Conference on Machine Learning (pp. 1263\u20131272)."},{"key":"9580_CR9","unstructured":"Iqbal, S., & Sha, F. (2019). Actor-attention-critic for multi-agent reinforcement learning. In International Conference on Machine Learning (pp. 2961\u20132970)."},{"key":"9580_CR10","unstructured":"Jiang, J., Dun, C., Huang, T., & Lu, Z. (2020) Graph convolutional reinforcement learning. In International Conference on Learning Representations."},{"key":"9580_CR11","unstructured":"Jiang, J., & Lu, Z. (2018) Learning attentional communication for multi-agent cooperation. In Neural Information Processing Systems (pp. 7254\u20137264)."},{"key":"9580_CR12","unstructured":"Kim, D., Moon, S., Hostallero, D., Kang, W.J., Lee, T., Son, K., & Yi, Y. (2019) Learning to schedule communication in multi-agent reinforcement learning. In International Conference on Learning Representations."},{"key":"9580_CR13","unstructured":"Lazaridou, A., Peysakhovich, A., & Baroni, M. (2016) Multi-agent cooperation and the emergence of (natural) language. arXiv preprint arXiv:1612.07182"},{"issue":"7553","key":"9580_CR14","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521(7553), 436\u2013444.","journal-title":"Nature"},{"key":"9580_CR15","first-page":"05493","volume":"1511","author":"Y Li","year":"2016","unstructured":"Li, Y., Tarlow, D., Brockschmidt, M., & Zemel, R. S. (2016). Gated graph sequence neural networks. International Conference on Learning Representations 1511, 05493.","journal-title":"International Conference on Learning Representations"},{"key":"9580_CR16","unstructured":"Lillicrap, T., Hunt, J., Pritzel, A., Heess, N., Erez, T., Tassa, Y., Silver, D., & Wierstra, D. (2016). Continuous control with deep reinforcement learning. In International Conference on Learning Representations."},{"key":"9580_CR17","unstructured":"Liu, I.J., Yeh, R.A., & Schwing, A.G. (2019). Pic: Permutation invariant critic for multi-agent deep reinforcement learning. (pp. 590\u2013602). PMLR."},{"key":"9580_CR18","unstructured":"Lowe, R., Wu, Y., Tamar, A., Harb, J., Abbeel, O.P., & Mordatch, I. (2017). Multi-agent actor-critic for mixed cooperative-competitive environments. In Neural Information Processing Systems (pp. 6379\u20136390)."},{"key":"9580_CR19","unstructured":"Malysheva, A., Sung, T.T., Sohn, C.B., Kudenko, D., & Shpilman, A. (2018) Deep multi-agent reinforcement learning with relevance graphs. arXiv preprint arXiv:1811.12557"},{"issue":"7540","key":"9580_CR20","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A. A., Veness, J., Bellemare, M. G., Graves, A., Riedmiller, M. A., Fidjeland, A., Ostrovski, G., Petersen, S., Beattie, C., Sadik, A., Antonoglou, I., King, H., Kumaran, D., Wierstra, D., Legg, S., & Hassabis, D. (2015). Human-level control through deep reinforcement learning. Nature, 518(7540), 529\u2013533.","journal-title":"Nature"},{"key":"9580_CR21","doi-asserted-by":"publisher","unstructured":"Mordatch, Igor, & Abbeel, Pieter. (2018). Emergence of Grounded Compositional Language in Multi-Agent Populations. Proceedings of the AAAI Conference on Artificial Intelligence, 32(1). https:\/\/doi.org\/10.1609\/aaai.v32i1.11492","DOI":"10.1609\/aaai.v32i1.11492"},{"key":"9580_CR22","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1016\/j.future.2020.07.032","volume":"114","author":"Danilo Pianini","year":"2021","unstructured":"Pianini, Danilo, Casadei, Roberto, Viroli, Mirko, & Natali, Antonio. (2021). Partitioned integration and coordination via the self-organising coordination regions pattern. Future Generation Computer Systems, 114, 44\u201368. https:\/\/doi.org\/10.1016\/j.future.2020.07.032","journal-title":"Future Generation Computer Systems,"},{"key":"9580_CR23","unstructured":"Raposo, D., Santoro, A., Barrett, D., Pascanu, R., Lillicrap, T., & Battaglia, P. (2017) Discovering objects and their relations from entangled scene representations. arXiv preprint arXiv:1702.05068 1702.05068."},{"key":"9580_CR24","unstructured":"Rezaee, M., & Yaghmaee, M. (2009). Cluster based routing protocol for mobile ad hoc networks. INFOCOM, 8(1), 30\u201336."},{"issue":"05","key":"9580_CR25","doi-asserted-by":"publisher","first-page":"7236","DOI":"10.1609\/aaai.v34i05.6214","volume":"34","author":"Heechang Ryu","year":"2020","unstructured":"Ryu, Heechang, Shin, Hayong, & Park, Jinkyoo. (2020). Multi-Agent Actor-Critic with Hierarchical Graph Attention Network. Proceedings of the AAAI Conference on Artificial Intelligence, 34(05), 7236\u20137243. https:\/\/doi.org\/10.1609\/aaai.v34i05.6214","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence,"},{"key":"9580_CR26","doi-asserted-by":"crossref","unstructured":"Scarselli, F., Gori, M., Tsoi, A. C., Hagenbuchner, M., & Monfardini, G. (2008). Computational capabilities of graph neural networks. IEEE Transactions on Neural Networks, 20(1), 81\u2013102.","DOI":"10.1109\/TNN.2008.2005141"},{"issue":"10","key":"9580_CR27","doi-asserted-by":"publisher","first-page":"2205","DOI":"10.1016\/j.automatica.2009.06.006","volume":"45","author":"E. Semsar-Kazerooni","year":"2009","unstructured":"Semsar-Kazerooni, E. ., & Khorasani, K. . (2009). Multi-agent team cooperation: A game theory approach. Automatica, 45(10), 2205\u20132213. https:\/\/doi.org\/10.1016\/j.automatica.2009.06.006","journal-title":"Automatica,"},{"issue":"7587","key":"9580_CR28","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"David Silver","year":"2016","unstructured":"Silver, David, Huang, Aja, Maddison, Chris J.., Guez, Arthur, Sifre, Laurent, van den Driessche, George, Schrittwieser, Julian, Antonoglou, Ioannis, Panneershelvam, Veda, Lanctot, Marc, Dieleman, Sander, Grewe, Dominik, Nham, John, Kalchbrenner, Nal, Sutskever, Ilya, Lillicrap, Timothy, Leach, Madeleine, Kavukcuoglu, Koray, Graepel, Thore, & Hassabis, Demis. (2016). Mastering the game of Go with deep neural networks and tree search. Nature, 529(7587), 484\u2013489. https:\/\/doi.org\/10.1038\/nature16961","journal-title":"Nature,"},{"key":"9580_CR29","unstructured":"Singh, A., Jain, T., & Sukhbaatar, S. (2019). Learning when to communicate at scale in multiagent cooperative and competitive tasks. In International Conference on Learning Representations. 1812.09755."},{"key":"9580_CR30","unstructured":"Sukhbaatar, S., Fergus, R., et al. (2016) Learning multiagent communication with backpropagation. In Neural Information Processing Systems, pp. 2244\u20132252."},{"issue":"5","key":"9580_CR31","doi-asserted-by":"publisher","first-page":"1054","DOI":"10.1109\/TNN.1998.712192","volume":"9","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R. .S. ., & Barto, A. .G. . (1998). Reinforcement Learning: An Introduction. IEEE Transactions on Neural Networks, 9(5), 1054\u20131054. https:\/\/doi.org\/10.1109\/TNN.1998.712192","journal-title":"IEEE Transactions on Neural Networks,"},{"key":"9580_CR32","unstructured":"Tacchetti, A., Song, H.F., Mediano, P.A.M., Zambaldi, V., Kram\u00e1r, J., Rabinowitz, N.C., Graepel, T., Botvinick, M., & Battaglia, P.W. (2019). Relational forward models for multi-agent learning. In International Conference on Learning Representations. 1809.11044."},{"key":"9580_CR33","doi-asserted-by":"crossref","unstructured":"Tampuu, A., Matiisen, T., Kodelja, D., Kuzovkin, I., Korjus, K., Aru, J., Aru, J., & Vicente, R. (2017). Multiagent cooperation and competition with deep reinforcement learning. PLOS ONE, 12(4), 1\u201315.","DOI":"10.1371\/journal.pone.0172395"},{"key":"9580_CR34","doi-asserted-by":"crossref","unstructured":"Tan, M. (1993). Multi-agent reinforcement learning: Independent vs. cooperative agents. In International Conference on Machine Learning (pp. 330\u2013337).","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"9580_CR35","doi-asserted-by":"publisher","unstructured":"Van Hasselt, Hado, Guez, Arthur, & Silver, David. (2016). Deep Reinforcement Learning with Double Q-Learning. Proceedings of the AAAI Conference on Artificial Intelligence, 30(1). https:\/\/doi.org\/10.1609\/aaai.v30i1.10295","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"9580_CR36","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., & Polosukhin, I. (2017). Attention is all you need. In Advances in Neural Information Processing Systems (pp. 5998\u20136008)."},{"key":"9580_CR37","doi-asserted-by":"publisher","first-page":"100486","DOI":"10.1016\/j.jlamp.2019.100486","volume":"109","author":"Mirko Viroli","year":"2019","unstructured":"Viroli, Mirko, Beal, Jacob, Damiani, Ferruccio, Audrito, Giorgio, Casadei, Roberto, & Pianini, Danilo. (2019). From distributed coordination to field calculus and aggregate computing. Journal of Logical and Algebraic Methods in Programming, 109, 100486. https:\/\/doi.org\/10.1016\/j.jlamp.2019.100486","journal-title":"Journal of Logical and Algebraic Methods in Programming,"},{"issue":"05","key":"9580_CR38","doi-asserted-by":"publisher","first-page":"7293","DOI":"10.1609\/aaai.v34i05.6221","volume":"34","author":"Weixun Wang","year":"2020","unstructured":"Wang, Weixun, Yang, Tianpei, Liu, Yong, Hao, Jianye, Hao, Xiaotian, Hu, Yujing, Chen, Yingfeng, Fan, Changjie, & Gao, Yang. (2020). From Few to More: Large-Scale Dynamic Multiagent Curriculum Learning. Proceedings of the AAAI Conference on Artificial Intelligence, 34(05), 7293\u20137300. https:\/\/doi.org\/10.1609\/aaai.v34i05.6221","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence,"},{"issue":"1","key":"9580_CR39","doi-asserted-by":"publisher","first-page":"262","DOI":"10.1016\/j.engappai.2012.01.008","volume":"26","author":"Xingwei Wang","year":"2013","unstructured":"Wang, Xingwei, Cheng, Hui, & Huang, Min. (2013). Multi-robot navigation based QoS routing in self-organizing networks. Engineering Applications of Artificial Intelligence, 26(1), 262\u2013272. https:\/\/doi.org\/10.1016\/j.engappai.2012.01.008","journal-title":"Engineering Applications of Artificial Intelligence,"},{"key":"9580_CR40","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R., Gupta, A., & He, K. (2018) Non-local neural networks. In IEEE conference on Computer Vision and Pattern Recognition (pp. 7794\u20137803).","DOI":"10.1109\/CVPR.2018.00813"},{"key":"9580_CR41","unstructured":"Wang, Z., Schaul, T., Hessel, M., Van Hasselt, H., Lanctot, M., & De Freitas, N. (2016). Dueling network architectures for deep reinforcement learning. In International Conference on Machine Learning (pp. 1995\u20132003)."},{"key":"9580_CR42","doi-asserted-by":"crossref","unstructured":"Watkins, C. J., & Dayan, P. (1992). Q-learning. Machine Learning, 8(3\u20134), 279\u2013292.","DOI":"10.1023\/A:1022676722315"},{"key":"9580_CR43","doi-asserted-by":"publisher","first-page":"497","DOI":"10.1007\/3-540-45023-8_48","volume-title":"Multi-agent systems and applications III,","author":"Danny Weyns","year":"2003","unstructured":"Weyns, Danny, & Holvoet, Tom. (2003). Regional synchronization for simultaneous actions in situated multi-agent systems. In Vladim\u00edr Ma\u0159\u00edk, Michal P\u011bchou\u010dek, & J\u00f6rg. M\u00fcller (Eds.), Multi-agent systems and applications III, (pp. 497\u2013510). Berlin, Heidelberg: Springer Berlin Heidelberg. https:\/\/doi.org\/10.1007\/3-540-45023-8_48"},{"issue":"2","key":"9580_CR44","doi-asserted-by":"publisher","first-page":"487","DOI":"10.1016\/j.artint.2010.09.008","volume":"175","author":"Feng Wu","year":"2011","unstructured":"Wu, Feng, Zilberstein, Shlomo, & Chen, Xiaoping. (2011). Online planning for multi-agent systems with bounded communication. Artificial Intelligence, 175(2), 487\u2013511. https:\/\/doi.org\/10.1016\/j.artint.2010.09.008","journal-title":"Artificial Intelligence,"},{"key":"9580_CR45","unstructured":"Yang, Y., Luo, R., Li, M., Zhou, M., Zhang, W., & Wang, J. (2018). Mean field multi-agent reinforcement learning. In International Conference on Machine Learning (pp. 5571\u20135580)."},{"key":"9580_CR46","unstructured":"Ying, Z., You, J., Morris, C., Ren, X., Hamilton, W., & Leskovec, J. (2018). Hierarchical graph representation learning with differentiable pooling. In Neural Information Processing Systems (pp. 4800\u20134810)."},{"key":"9580_CR47","unstructured":"Zhang, C., & Lesser, V. (2013). Coordinating multi-agent reinforcement learning with limited communication. In International conference on Autonomous Agents and Multi-Agent Systems (pp. 1101\u20131108)."},{"key":"9580_CR48","unstructured":"Zhang, C., & Lesser, V. (2013). Coordinating multi-agent reinforcement learning with limited communication. In International conference on Autonomous Agents and Multi-Agent Systems. 1902.01554."}],"container-title":["Autonomous Agents and Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-022-09580-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10458-022-09580-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-022-09580-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,29]],"date-time":"2022-10-29T12:54:45Z","timestamp":1667048085000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10458-022-09580-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,26]]},"references-count":48,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2022,10]]}},"alternative-id":["9580"],"URL":"https:\/\/doi.org\/10.1007\/s10458-022-09580-8","relation":{},"ISSN":["1387-2532","1573-7454"],"issn-type":[{"value":"1387-2532","type":"print"},{"value":"1573-7454","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,8,26]]},"assertion":[{"value":"10 August 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 August 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no conflicts of interest to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"50"}}