{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T13:40:55Z","timestamp":1730209255173,"version":"3.28.0"},"reference-count":41,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,8,21]],"date-time":"2022-08-21T00:00:00Z","timestamp":1661040000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,8,21]],"date-time":"2022-08-21T00:00:00Z","timestamp":1661040000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012492","name":"Youth Innovation Promotion Association","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012492","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,8,21]]},"DOI":"10.1109\/cog51982.2022.9893710","type":"proceedings-article","created":{"date-parts":[[2022,9,20]],"date-time":"2022-09-20T15:33:31Z","timestamp":1663688011000},"page":"127-134","source":"Crossref","is-referenced-by-count":9,"title":["DouZero+: Improving DouDizhu AI by Opponent Modeling and Coach-guided Learning"],"prefix":"10.1109","author":[{"given":"Youpeng","family":"Zhao","sequence":"first","affiliation":[{"name":"University of Science and Technology of China,Hefei,China"}]},{"given":"Jian","family":"Zhao","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China,Hefei,China"}]},{"given":"Xunhan","family":"Hu","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China,Hefei,China"}]},{"given":"Wengang","family":"Zhou","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China,Institute of Artificial Intelligence Hefei Comprehensive Nation Science Center,Hefei,China"}]},{"given":"Houqiang","family":"Li","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China,Institute of Artificial Intelligence Hefei Comprehensive Nation Science Center,Hefei,China"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CoG47356.2020.9231667"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"journal-title":"arXiv preprint arXiv 1709 04396","article-title":"Learning with opponent-learning awareness","year":"2017","author":"foerster","key":"ref33"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.5220\/0006536300290040"},{"key":"ref31","first-page":"1804","article-title":"Opponent modeling in deep reinforcement learning","author":"he","year":"2016","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2015.7317929"},{"journal-title":"arXiv preprint arXiv 1710 03748","article-title":"Emergent complexity via multi-agent competition","year":"2017","author":"bansal","key":"ref37"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-45006-8_15"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1080\/095281398146789"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-31368-4_26"},{"key":"ref10","doi-asserted-by":"crossref","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","article-title":"Grandmaster level in starcraft II using multi-agent reinforcement learning","volume":"575","author":"vinyals","year":"2019","journal-title":"Nature"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CoG47356.2020.9231670"},{"journal-title":"arXiv preprint arXiv 1912 06680","article-title":"Dota 2 with large scale deep reinforcement learning","year":"2019","author":"berner","key":"ref11"},{"journal-title":"arXiv preprint arXiv 2003 13874","article-title":"Suphx: Mastering mahjong with deep reinforcement learning","year":"2020","author":"li","key":"ref12"},{"key":"ref13","article-title":"An introduction to counterfactual regret minimization","volume":"11","author":"neller","year":"2013","journal-title":"Educational Advances in Artificial Intelligence (EAAI)"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"journal-title":"arXiv preprint arXiv 1809 02121","article-title":"Learn what not to learn: Action elimination with deep reinforcement learning","year":"2018","author":"zahavy","key":"ref15"},{"key":"ref16","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref17","first-page":"301","article-title":"Combinatorial q-learning for dou di zhu","volume":"16","author":"you","year":"2020","journal-title":"AAAI Artificial Intelligence and Interactive Digital Entertainment"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/176"},{"journal-title":"arXiv preprint arXiv 2106 01111","article-title":"Douzero: Mastering doudizhu with self-play deep reinforcement learning","year":"2021","author":"zha","key":"ref19"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1088\/1742-6596\/1952\/4\/042104"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6404"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2018.8490419"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CoG52621.2021.9619078"},{"journal-title":"arXiv preprint arXiv 1603 01840","article-title":"Deep reinforcement learning from self-play in imperfect-information games","year":"2016","author":"heinrich","key":"ref6"},{"journal-title":"arXiv preprint arXiv 1207 1411","article-title":"Bayes’ bluff: Opponent modelling in poker","year":"2012","author":"southey","key":"ref29"},{"key":"ref5","first-page":"576","article-title":"Mastering fighting game using deep reinforcement learning with self-play","author":"kim","year":"2020","journal-title":"IEEE Conference on Games (COG)"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"508","DOI":"10.1126\/science.aam6960","article-title":"Deepstack: Expert-level artificial intelligence in heads-up no-limit poker","volume":"356","author":"morav?\u00edk","year":"2017","journal-title":"Science"},{"key":"ref7","doi-asserted-by":"crossref","first-page":"418","DOI":"10.1126\/science.aao1733","article-title":"Superhuman ai for heads-up no-limit poker: Libratus beats top professionals","volume":"359","author":"brown","year":"2018","journal-title":"Science"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1126\/science.aay2400"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"journal-title":"Reinforcement Learning An Introduction","year":"2018","author":"sutton","key":"ref20"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3197091.3197099"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3063955.3063961"},{"journal-title":"arXiv preprint arXiv 1711 00540","article-title":"A unified game-theoretic approach to multiagent reinforcement learning","year":"2017","author":"lanctot","key":"ref24"},{"key":"ref23","first-page":"1","article-title":"Botzone: A game playing system for artificial intelligence education","author":"zhang","year":"2012","journal-title":"International Conference on Frontiers in Education Computer Science and Computer Engineering (FECS)"},{"journal-title":"arXiv preprint arXiv 1712 01275","article-title":"A deeper look at experience replay","year":"2017","author":"zhang","key":"ref41"},{"journal-title":"arXiv preprint arXiv 2007 13344","article-title":"Combining deep reinforcement learning and search for imperfect-information games","year":"2020","author":"brown","key":"ref26"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6144"}],"event":{"name":"2022 IEEE Conference on Games (CoG)","start":{"date-parts":[[2022,8,21]]},"location":"Beijing, China","end":{"date-parts":[[2022,8,24]]}},"container-title":["2022 IEEE Conference on Games (CoG)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9893561\/9893544\/09893710.pdf?arnumber=9893710","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,10]],"date-time":"2022-10-10T16:25:39Z","timestamp":1665419139000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9893710\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,21]]},"references-count":41,"URL":"https:\/\/doi.org\/10.1109\/cog51982.2022.9893710","relation":{},"subject":[],"published":{"date-parts":[[2022,8,21]]}}}