{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T13:40:21Z","timestamp":1732801221194,"version":"3.29.0"},"reference-count":63,"publisher":"Springer Science and Business Media LLC","issue":"7782","license":[{"start":{"date-parts":[[2019,10,30]],"date-time":"2019-10-30T00:00:00Z","timestamp":1572393600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2019,10,30]],"date-time":"2019-10-30T00:00:00Z","timestamp":1572393600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Nature"],"published-print":{"date-parts":[[2019,11,14]]},"DOI":"10.1038\/s41586-019-1724-z","type":"journal-article","created":{"date-parts":[[2019,10,30]],"date-time":"2019-10-30T23:03:36Z","timestamp":1572476616000},"page":"350-354","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Grandmaster level in StarCraft II using multi-agent reinforcement learning"],"prefix":"10.1038","volume":"575","author":[{"given":"Oriol","family":"Vinyals","sequence":"first","affiliation":[]},{"given":"Igor","family":"Babuschkin","sequence":"additional","affiliation":[]},{"given":"Wojciech M.","family":"Czarnecki","sequence":"additional","affiliation":[]},{"given":"Micha\u00ebl","family":"Mathieu","sequence":"additional","affiliation":[]},{"given":"Andrew","family":"Dudzik","sequence":"additional","affiliation":[]},{"given":"Junyoung","family":"Chung","sequence":"additional","affiliation":[]},{"given":"David H.","family":"Choi","sequence":"additional","affiliation":[]},{"given":"Richard","family":"Powell","sequence":"additional","affiliation":[]},{"given":"Timo","family":"Ewalds","sequence":"additional","affiliation":[]},{"given":"Petko","family":"Georgiev","sequence":"additional","affiliation":[]},{"given":"Junhyuk","family":"Oh","sequence":"additional","affiliation":[]},{"given":"Dan","family":"Horgan","sequence":"additional","affiliation":[]},{"given":"Manuel","family":"Kroiss","sequence":"additional","affiliation":[]},{"given":"Ivo","family":"Danihelka","sequence":"additional","affiliation":[]},{"given":"Aja","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Laurent","family":"Sifre","sequence":"additional","affiliation":[]},{"given":"Trevor","family":"Cai","sequence":"additional","affiliation":[]},{"given":"John P.","family":"Agapiou","sequence":"additional","affiliation":[]},{"given":"Max","family":"Jaderberg","sequence":"additional","affiliation":[]},{"given":"Alexander S.","family":"Vezhnevets","sequence":"additional","affiliation":[]},{"given":"R\u00e9mi","family":"Leblond","sequence":"additional","affiliation":[]},{"given":"Tobias","family":"Pohlen","sequence":"additional","affiliation":[]},{"given":"Valentin","family":"Dalibard","sequence":"additional","affiliation":[]},{"given":"David","family":"Budden","sequence":"additional","affiliation":[]},{"given":"Yury","family":"Sulsky","sequence":"additional","affiliation":[]},{"given":"James","family":"Molloy","sequence":"additional","affiliation":[]},{"given":"Tom L.","family":"Paine","sequence":"additional","affiliation":[]},{"given":"Caglar","family":"Gulcehre","sequence":"additional","affiliation":[]},{"given":"Ziyu","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Tobias","family":"Pfaff","sequence":"additional","affiliation":[]},{"given":"Yuhuai","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Roman","family":"Ring","sequence":"additional","affiliation":[]},{"given":"Dani","family":"Yogatama","sequence":"additional","affiliation":[]},{"given":"Dario","family":"W\u00fcnsch","sequence":"additional","affiliation":[]},{"given":"Katrina","family":"McKinney","sequence":"additional","affiliation":[]},{"given":"Oliver","family":"Smith","sequence":"additional","affiliation":[]},{"given":"Tom","family":"Schaul","sequence":"additional","affiliation":[]},{"given":"Timothy","family":"Lillicrap","sequence":"additional","affiliation":[]},{"given":"Koray","family":"Kavukcuoglu","sequence":"additional","affiliation":[]},{"given":"Demis","family":"Hassabis","sequence":"additional","affiliation":[]},{"given":"Chris","family":"Apps","sequence":"additional","affiliation":[]},{"given":"David","family":"Silver","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,10,30]]},"reference":[{"key":"1724_CR1","unstructured":"AIIDE StarCraft AI Competition. https:\/\/www.cs.mun.ca\/dchurchill\/starcraftaicomp\/."},{"key":"1724_CR2","unstructured":"Student StarCraft AI Tournament and Ladder. https:\/\/sscaitournament.com\/."},{"key":"1724_CR3","unstructured":"Starcraft 2 AI ladder. https:\/\/sc2ai.net\/."},{"key":"1724_CR4","unstructured":"Churchill, D., Lin, Z. & Synnaeve, G. An analysis of model-based heuristic search techniques for StarCraft combat scenarios. in Artificial Intelligence and Interactive Digital Entertainment Conf. (AAAI, 2017)."},{"key":"1724_CR5","doi-asserted-by":"crossref","unstructured":"Sutton, R. & Barto, A. Reinforcement Learning: An Introduction (MIT Press, 1998).","DOI":"10.1109\/TNN.1998.712192"},{"key":"1724_CR6","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y. & Hinton, G. Deep learning. Nature 521, 436\u2013444 (2015).","journal-title":"Nature"},{"key":"1724_CR7","unstructured":"Vinyals, O. et al. StarCraft II: a new challenge for reinforcement learning. Preprint at https:\/\/arxiv.org\/abs\/1708.04782 (2017)."},{"key":"1724_CR8","first-page":"5998","volume":"30","author":"A Vaswani","year":"2017","unstructured":"Vaswani, A. et al. Attention is all you need. Adv. Neural Information Process. Syst. 30, 5998\u20136008 (2017).","journal-title":"Neural Information Process. Syst."},{"key":"1724_CR9","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S. & Schmidhuber, J. Long short-term memory. Neural Comput. 9, 1735\u20131780 (1997).","journal-title":"Neural Comput."},{"key":"1724_CR10","doi-asserted-by":"crossref","first-page":"1045","DOI":"10.21437\/Interspeech.2010-343","volume":"2010","author":"T Mikolov","year":"2010","unstructured":"Mikolov, T., Karafiat, M., Burget, L., Cernocky, J. & Khudanpur, S. Recurrent neural network based language model. INTERSPEECH-2010 1045\u20131048 (2010).","journal-title":"INTERSPEECH"},{"key":"1724_CR11","unstructured":"Metz, L., Ibarz, J., Jaitly, N. & Davidson, J. Discrete sequential prediction of continuous actions for deep RL. Preprint at https:\/\/arxiv.org\/abs\/1705.05035v3 (2017)."},{"key":"1724_CR12","first-page":"2692","volume":"28","author":"O Vinyals","year":"2015","unstructured":"Vinyals, O., Fortunato, M. & Jaitly, N. Pointer networks. Adv. Neural Information Process. Syst. 28, 2692\u20132700 (2015).","journal-title":"Adv. Neural Information Process. Syst."},{"key":"1724_CR13","first-page":"1928","volume":"48","author":"V Mnih","year":"2016","unstructured":"Mnih, V. et al. Asynchronous methods for deep reinforcement learning. Proc. Machine Learning Res. 48, 1928\u20131937 (2016).","journal-title":"Proc Machine Learning Res."},{"key":"1724_CR14","first-page":"1407","volume":"80","author":"L Espeholt","year":"2018","unstructured":"Espeholt, L. et al. IMPALA: scalable distributed deep-RL with importance weighted actor-learner architectures. Proc. Machine Learning Res. 80, 1407\u20131416 (2018).","journal-title":"Proc. Machine Learning Res."},{"key":"1724_CR15","unstructured":"Wang, Z. et al. Sample efficient actor-critic with experience replay. Preprint at https:\/\/arxiv.org\/abs\/1611.01224v2 (2017)."},{"key":"1724_CR16","first-page":"9","volume":"3","author":"R Sutton","year":"1988","unstructured":"Sutton, R. Learning to predict by the method of temporal differences. Mach. Learn. 3, 9\u201344 (1988).","journal-title":"Mach. Learn."},{"key":"1724_CR17","first-page":"3875","volume":"80","author":"J Oh","year":"2018","unstructured":"Oh, J., Guo, Y., Singh, S. & Lee, H. Self-Imitation Learning. Proc. Machine Learning Res. 80, 3875\u20133884 (2018).","journal-title":"Proc. Machine Learning Res."},{"key":"1724_CR18","doi-asserted-by":"publisher","first-page":"1140","DOI":"10.1126\/science.aar6404","volume":"362","author":"D Silver","year":"2018","unstructured":"Silver, D. et al. A general reinforcement learning algorithm that masters chess, shogi, and Go through self-play. Science 362, 1140\u20131144 (2018).","journal-title":"Science"},{"key":"1724_CR19","first-page":"434","volume":"97","author":"D Balduzzi","year":"2019","unstructured":"Balduzzi, D. et al. Open-ended learning in symmetric zero-sum games. Proc. Machine Learning Res. 97, 434\u2013443 (2019).","journal-title":"Proc. Machine Learning Res."},{"key":"1724_CR20","first-page":"374","volume":"13","author":"GW Brown","year":"1951","unstructured":"Brown, G. W. Iterative solution of games by fictitious play. Act. Anal. Prod. Alloc. 13, 374\u2013376 (1951).","journal-title":"Act. Anal. Prod. Alloc."},{"key":"1724_CR21","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1016\/j.geb.2005.08.005","volume":"56","author":"DS Leslie","year":"2006","unstructured":"Leslie, D. S. & Collins, E. J. Generalised weakened fictitious play. Games Econ. Behav. 56, 285\u2013298 (2006).","journal-title":"Games Econ. Behav."},{"key":"1724_CR22","first-page":"805","volume":"32","author":"J Heinrich","year":"2015","unstructured":"Heinrich, J., Lanctot, M. & Silver, D. Fictitious self-play in extensive-form games. Proc. Intl Conf. Machine Learning 32, 805\u2013813 (2015).","journal-title":"Proc. Intl Conf. Machine Learning"},{"key":"1724_CR23","unstructured":"Jouppi, N. P. et al. In-datacenter performance analysis of a tensor processing unit. Preprint at https:\/\/arxiv.org\/abs\/1704.04760v1 (2017)."},{"key":"1724_CR24","unstructured":"Elo, A. E. The Rating of Chessplayers, Past and Present (Arco, 2017)."},{"key":"1724_CR25","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1016\/S0004-3702(01)00129-1","volume":"134","author":"M Campbell","year":"2002","unstructured":"Campbell, M., Hoane, A. & Hsu, F. Deep Blue. Artif. Intell. 134, 57\u201383 (2002).","journal-title":"Artif. Intell."},{"key":"1724_CR26","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D. et al. Mastering the game of Go with deep neural networks and tree search. Nature 529, 484\u2013489 (2016).","journal-title":"Nature"},{"key":"1724_CR27","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V. et al. Human-level control through deep reinforcement learning. Nature 518, 529\u2013533 (2015).","journal-title":"Nature"},{"key":"1724_CR28","doi-asserted-by":"crossref","unstructured":"Pathak, D., Agrawal, P., Efros, A. A. & Darrell, T. Curiosity-driven exploration by self-supervised prediction. Proc. IEEE Conf. Computer Vision Pattern Recognition Workshops 16\u201317 (IEEE, 2017).","DOI":"10.1109\/CVPRW.2017.70"},{"key":"1724_CR29","doi-asserted-by":"publisher","first-page":"859","DOI":"10.1126\/science.aau6249","volume":"364","author":"M Jaderberg","year":"2019","unstructured":"Jaderberg, M. et al. Human-level performance in 3D multiplayer games with population-based reinforcement learning. Science 364, 859\u2013865 (2019).","journal-title":"Science"},{"key":"1724_CR30","unstructured":"OpenAI OpenAI Five. https:\/\/blog.openai.com\/openai-five\/ (2018)."},{"key":"1724_CR31","unstructured":"Buro, M. Real-time strategy games: a new AI research challenge. Intl Joint Conf. Artificial Intelligence 1534\u20131535 (2003)."},{"key":"1724_CR32","unstructured":"Samvelyan, M. et al. The StarCraft multi-agent challenge. Intl Conf. Autonomous Agents and MultiAgent Systems 2186\u20132188 (2019)."},{"key":"1724_CR33","unstructured":"Zambaldi, V. et al. Relational deep reinforcement learning. Preprint at https:\/\/arxiv.org\/abs\/1806.01830v2 (2018)."},{"key":"1724_CR34","unstructured":"Usunier, N., Synnaeve, G., Lin, Z. & Chintala, S. Episodic exploration for deep deterministic policies: an application to StarCraft micromanagement tasks. Preprint at https:\/\/arxiv.org\/abs\/1609.02993v3 (2017)."},{"key":"1724_CR35","doi-asserted-by":"crossref","unstructured":"Weber, B. G. & Mateas, M. Case-based reasoning for build order in real-time strategy games. AIIDE \u201909 Proc. 5th AAAI Conf. Artificial Intelligence and Interactive Digital Entertainment 106\u2013111 (2009).","DOI":"10.1609\/aiide.v5i1.12360"},{"key":"1724_CR36","doi-asserted-by":"crossref","unstructured":"Buro, M. ORTS: a hack-free RTS game environment. Intl Conf. Computers and Games 280\u2013291 (Springer, 2002).","DOI":"10.1007\/978-3-540-40031-8_19"},{"key":"1724_CR37","unstructured":"Churchill, D. SparCraft: open source StarCraft combat simulation. https:\/\/code.google.com\/archive\/p\/sparcraft\/ (2013)."},{"key":"1724_CR38","doi-asserted-by":"crossref","unstructured":"Weber, B. G. AIIDE 2010 StarCraft competition. Artificial Intelligence and Interactive Digital Entertainment Conf. (2010).","DOI":"10.1609\/aiide.v6i1.12384"},{"key":"1724_CR39","unstructured":"Uriarte, A. & Onta\u00f1\u00f3n, S. Improving Monte Carlo tree search policies in StarCraft via probabilistic models learned from replay data. Artificial Intelligence and Interactive Digital Entertainment Conf. 101\u2013106 (2016)."},{"key":"1724_CR40","doi-asserted-by":"crossref","unstructured":"Hsieh, J.-L. & Sun, C.-T. Building a player strategy model by analyzing replays of real-time strategy games. IEEE Intl Joint Conf. Neural Networks 3106\u20133111 (2008).","DOI":"10.1109\/IJCNN.2008.4634237"},{"key":"1724_CR41","doi-asserted-by":"crossref","unstructured":"Synnaeve, G. & Bessiere, P. A Bayesian model for plan recognition in RTS games applied to StarCraft. Artificial Intelligence and Interactive Digital Entertainment Conf. 79\u201384 (2011).","DOI":"10.1609\/aiide.v7i1.12429"},{"key":"1724_CR42","first-page":"73","volume":"3","author":"K Shao","year":"2019","unstructured":"Shao, K., Zhu, Y. & Zhao, D. StarCraft micromanagement with reinforcement learning and curriculum transfer learning. IEEE Trans. Emerg. Top. Comput. Intell. 3, 73\u201384 (2019).","journal-title":"Top. Comput. Intell."},{"key":"1724_CR43","unstructured":"Facebook CherryPi. https:\/\/torchcraft.github.io\/TorchCraftAI\/."},{"key":"1724_CR44","unstructured":"Berkeley Overmind. https:\/\/www.icsi.berkeley.edu\/icsi\/news\/2010\/10\/klein-berkeley-overmind (2010)."},{"key":"1724_CR45","doi-asserted-by":"crossref","unstructured":"Justesen, N. & Risi, S. Learning macromanagement in StarCraft from replays using deep learning. IEEE Conf. Computational Intelligence and Games (CIG) 162\u2013169 (2017).","DOI":"10.1109\/CIG.2017.8080430"},{"key":"1724_CR46","first-page":"10738","volume":"31","author":"G Synnaeve","year":"2018","unstructured":"Synnaeve, G. et al. Forward modeling for partial observation strategy games\u2014a StarCraft defogger. Adv. Neural Information Process. Syst. 31, 10738\u201310748 (2018).","journal-title":"Adv. Neural Information Process. Syst."},{"key":"1724_CR47","doi-asserted-by":"publisher","first-page":"102","DOI":"10.1609\/aimag.v37i2.2657","volume":"37","author":"SS Farooq","year":"2016","unstructured":"Farooq, S. S., Oh, I.-S., Kim, M.-J. & Kim, K. J. StarCraft AI competition report. AI Mag. 37, 102\u2013107 (2016).","journal-title":"AI Mag."},{"key":"1724_CR48","unstructured":"Sun, P. et al. TStarBots: defeating the cheating level builtin AI in StarCraft II in the full game. Preprint at https:\/\/arxiv.org\/abs\/1809.07193v3 (2018)."},{"key":"1724_CR49","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A. & Klimov, O. Proximal policy optimization algorithms. Preprint at https:\/\/arxiv.org\/abs\/1707.06347v2 (2017)."},{"key":"1724_CR50","unstructured":"Ibarz, B. et al. Reward learning from human preferences and demonstrations in Atari. Adv. Neural Information Process. Syst. 31, 8011\u20138023 (2018)."},{"key":"1724_CR51","doi-asserted-by":"crossref","unstructured":"Nair, A., McGrew, B., Andrychowicz, M., Zaremba, W. & Abbeel, P. Overcoming exploration in reinforcement learning with demonstrations. IEEE Intl Conf. Robotics and Automation 6292\u20136299 (2018).","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"1724_CR52","unstructured":"Christiano, P. F. et al. Deep reinforcement learning from human preferences. Adv. Neural Information Process. Syst. 30, 4299\u20134307 (2017)."},{"key":"1724_CR53","unstructured":"Lanctot, M. et al. A unified game-theoretic approach to multiagent reinforcement learning. Adv. Neural Information Process. Syst. 30, 4190\u20134203 (2017)."},{"key":"1724_CR54","doi-asserted-by":"crossref","unstructured":"Perez, E., Strub, F., De Vries, H., Dumoulin, V. & Courville, A. FiLM: visual reasoning with a general conditioning layer. Preprint at https:\/\/arxiv.org\/abs\/1709.07871v2 (2018).","DOI":"10.1609\/aaai.v32i1.11671"},{"key":"1724_CR55","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S. & Sun, J. Deep residual learning for image recognition. Proc. IEEE Conf. Computer Vision and Pattern Recognition 770\u2013778 (2016).","DOI":"10.1109\/CVPR.2016.90"},{"key":"1724_CR56","unstructured":"Hinton, G., Vinyals, O. & Dean, J. Distilling the knowledge in a neural network. Preprint at https:\/\/arxiv.org\/abs\/1503.02531v1 (2015)."},{"key":"1724_CR57","unstructured":"Kingma, D. P. & Ba, J. Adam: a method for stochastic optimization. Preprint at https:\/\/arxiv.org\/abs\/1412.6980v9 (2014)."},{"key":"1724_CR58","unstructured":"Bishop, C. M. Pattern Recognition and Machine Learning (Springer, 2006)."},{"key":"1724_CR59","unstructured":"Rusu, A. A. et al. Policy distillation. Preprint at https:\/\/arxiv.org\/abs\/1511.06295 (2016)."},{"key":"1724_CR60","unstructured":"Parisotto, E., Ba, J. & Salakhutdinov, R. Actor-mimic: deep multitask and transfer reinforcement learning. Preprint at https:\/\/arxiv.org\/abs\/1511.06342 (2016)."},{"key":"1724_CR61","unstructured":"Precup, D., Sutton, R. S. & Singh, S. P. Eligibility traces for off-policy policy evaluation. ICML \u201900 Proc. 17th Intl Conf. Machine Learning 759\u2013766 (2016)."},{"key":"1724_CR62","unstructured":"DeepMind Research on Ladder. https:\/\/starcraft2.com\/en-us\/news\/22933138 (2019)."},{"key":"1724_CR63","unstructured":"Vinyals, O. et al. AlphaStar: mastering the real-time strategy game StarCraft II https:\/\/deepmind.com\/blog\/article\/alphastar-mastering-real-time-strategy-game-starcraft-ii (DeepMind, 2019)."}],"container-title":["Nature"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s41586-019-1724-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41586-019-1724-z","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41586-019-1724-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T09:04:19Z","timestamp":1732698259000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s41586-019-1724-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,10,30]]},"references-count":63,"journal-issue":{"issue":"7782","published-print":{"date-parts":[[2019,11,14]]}},"alternative-id":["1724"],"URL":"https:\/\/doi.org\/10.1038\/s41586-019-1724-z","relation":{},"ISSN":["0028-0836","1476-4687"],"issn-type":[{"type":"print","value":"0028-0836"},{"type":"electronic","value":"1476-4687"}],"subject":[],"published":{"date-parts":[[2019,10,30]]},"assertion":[{"value":"30 August 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 October 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 October 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"M.J., W.M.C., O.V., and D.S. have filed provisional patent application 62\/796,567 about the contents of this manuscript. The remaining authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}