{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,25]],"date-time":"2024-10-25T04:22:41Z","timestamp":1729830161821,"version":"3.28.0"},"reference-count":31,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000006","name":"Office of Naval Research","doi-asserted-by":"publisher","award":["N00014-21-1-2385"],"id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100023530","name":"Army Contracting Command - Aberdeen Proving Ground","doi-asserted-by":"publisher","award":["W911NF-22-1-0151"],"id":[{"id":"10.13039\/100023530","id-type":"DOI","asserted-by":"publisher"}]},{"name":"U.S. ARO","award":["W911NF2120064"]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS-2328395","CMMI-2038625"],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Control Syst. Lett."],"published-print":{"date-parts":[[2024]]},"DOI":"10.1109\/lcsys.2024.3410149","type":"journal-article","created":{"date-parts":[[2024,6,5]],"date-time":"2024-06-05T18:08:46Z","timestamp":1717610926000},"page":"1217-1222","source":"Crossref","is-referenced-by-count":1,"title":["Linear Convergence of Independent Natural Policy Gradient in Games With Entropy Regularization"],"prefix":"10.1109","volume":"8","author":[{"ORCID":"http:\/\/orcid.org\/0000-0003-2494-8552","authenticated-orcid":false,"given":"Youbang","family":"Sun","sequence":"first","affiliation":[{"name":"Department of Mechanical and Industrial Engineering, Northeastern University, Boston, MA, USA"}]},{"ORCID":"http:\/\/orcid.org\/0000-0001-7879-5315","authenticated-orcid":false,"given":"Tao","family":"Liu","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, Texas A&M University, College Station, TX, USA"}]},{"ORCID":"http:\/\/orcid.org\/0000-0003-0389-5367","authenticated-orcid":false,"given":"P. R.","family":"Kumar","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, Texas A&M University, College Station, TX, USA"}]},{"ORCID":"http:\/\/orcid.org\/0000-0003-3093-8510","authenticated-orcid":false,"given":"Shahin","family":"Shahrampour","sequence":"additional","affiliation":[{"name":"Department of Mechanical and Industrial Engineering, Northeastern University, Boston, MA, USA"}]}],"member":"263","reference":[{"issue":"1","key":"ref1","first-page":"4431","article-title":"On the theory of policy gradient methods: Optimality, approximation, and distribution shift","volume":"22","author":"Agarwal","year":"2021","journal-title":"J. Mach. Learn. Res."},{"key":"ref2","first-page":"1","article-title":"Multi-agent learning in network zero-sum games is a Hamiltonian system","volume":"10","author":"Bailey","year":"2019","journal-title":"Momentum"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-61045-6_11"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1287\/opre.2021.0014"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CDC51059.2022.9993175"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1287\/opre.2021.2151"},{"key":"ref7","first-page":"1","article-title":"Faster last-iterate convergence of policy optimization in zero-sum Markov games","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Cen"},{"key":"ref8","first-page":"27952","article-title":"Fast policy extragradient methods for competitive games with entropy regularization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Cen"},{"key":"ref9","first-page":"4699","article-title":"Provable policy gradient methods for average-reward markov potential games","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Cheng"},{"key":"ref10","first-page":"5527","article-title":"Independent policy gradient methods for competitive reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Daskalakis"},{"key":"ref11","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja"},{"key":"ref12","first-page":"16829","article-title":"An adaptive entropy-regularization framework for multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kim"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-022-01816-5"},{"key":"ref14","first-page":"7624","article-title":"An improved analysis of (variance-reduced) policy gradient and natural policy gradient methods","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Liu"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1006\/game.1995.1023"},{"key":"ref16","first-page":"6820","article-title":"On the global convergence rates of softmax policy gradient methods","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Mei"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/s41884-023-00106-z"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.36.1.48"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-05816-6_3"},{"key":"ref20","first-page":"1","article-title":"Competitive gradient descent","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Sch\u00e4fer"},{"key":"ref21","article-title":"Safe, multi-agent, reinforcement learning for autonomous driving","author":"Shalev-Shwartz","year":"2016","journal-title":"arXiv:1610.03295"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1515\/9781400882014-002"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/LCSYS.2024.3410149"},{"key":"ref24","first-page":"1","article-title":"Provably fast convergence of independent natural policy gradient for Markov potential games","volume-title":"Proc. 37th Adv. Neural Inf. Process. Syst.","volume":"36","author":"Sun"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.2139\/ssrn.3900237"},{"key":"ref26","first-page":"4259","article-title":"Last-iterate convergence of decentralized optimistic gradient descent\/ascent in infinite-horizon competitive Markov games","volume-title":"Proc. Conf. Learn. Theory","author":"Wei"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1137\/21M1456789"},{"key":"ref28","first-page":"1923","article-title":"On the global convergence rates of decentralized softmax gradient play in Markov potential games","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Zhang"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/tac.2024.3387208"},{"key":"ref30","first-page":"1","article-title":"Natural actor-critic for robust reinforcement learning with function approximation","volume-title":"Proc. 37th Adv. Neural Inf. Process. Syst.","author":"Zhou"},{"key":"ref31","first-page":"1","article-title":"Anchor-changing regularized natural policy gradient for multi-objective reinforcement learning","volume-title":"Proc. 36th Adv. Neural Inf. Process. Syst.","author":"Zhou"}],"container-title":["IEEE Control Systems Letters"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/7782633\/10411713\/10549978-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7782633\/10411713\/10549978.pdf?arnumber=10549978","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,24]],"date-time":"2024-10-24T17:40:07Z","timestamp":1729791607000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10549978\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/lcsys.2024.3410149","relation":{},"ISSN":["2475-1456"],"issn-type":[{"type":"electronic","value":"2475-1456"}],"subject":[],"published":{"date-parts":[[2024]]}}}