{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T03:17:54Z","timestamp":1730344674875,"version":"3.28.0"},"reference-count":25,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,6]]},"DOI":"10.23919\/wac.2018.8430409","type":"proceedings-article","created":{"date-parts":[[2018,8,9]],"date-time":"2018-08-09T22:09:52Z","timestamp":1533852592000},"page":"1-6","source":"Crossref","is-referenced-by-count":8,"title":["Multi-Agent Exploration for Faster and Reliable Deep Q-Learning Convergence in Reinforcement Learning"],"prefix":"10.23919","author":[{"given":"Abhijit","family":"Majumdar","sequence":"first","affiliation":[]},{"given":"Patrick","family":"Benavidez","sequence":"additional","affiliation":[]},{"given":"Mo","family":"Jamshidi","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","first-page":"487","article-title":"Readings in agents","author":"tan","year":"1998","journal-title":"Multi-agent Reinforcement Learning Independent Vs Cooperative Agents"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2007.4399095"},{"key":"ref12","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume":"abs 1706 2275","author":"lowe","year":"2017","journal-title":"CoRR"},{"key":"ref13","article-title":"Multiagent cooperation and competition with deep reinforcement learning","volume":"abs 1511 8779","author":"tampuu","year":"2015","journal-title":"CoRR"},{"key":"ref14","article-title":"AWESOME: A general multiagent learning algorithm that converges in self-play and learns a best response against stationary opponents","volume":"cs gt 307002","author":"conitzer","year":"2003","journal-title":"CoRR"},{"key":"ref15","first-page":"871","article-title":"Extending q-learning to general adaptive multi-agent systems","author":"tesauro","year":"2004","journal-title":"Advances in Neural Information Processing Systems 16"},{"key":"ref16","article-title":"Learning to communicate with deep multi-agent reinforcement learning","volume":"abs 1605 6676","author":"foerster","year":"2016","journal-title":"CoRR"},{"key":"ref17","first-page":"171","article-title":"A centralized reinforcement learning method for multiagent job scheduling in grid","author":"moradi","year":"0","journal-title":"2016 6th International Conference on Computer and Knowledge Engineering"},{"key":"ref18","first-page":"1","article-title":"A centralized reinforcement learning approach for proactive scheduling in manufacturing","author":"qu","year":"0","journal-title":"20th IEEE International Conference on Emerging Technologies and Factory Automation 2015"},{"key":"ref19","article-title":"Multi-agent reinforcement learning in sequential social dilemmas","volume":"abs 1702 3037","author":"leibo","year":"2017","journal-title":"CoRR"},{"key":"ref4","article-title":"Prioritized experience replay","volume":"abs 1511 5952","author":"schaul","year":"2015","journal-title":"CoRR"},{"key":"ref3","article-title":"Deep reinforcement learning with double q-learning","volume":"abs 1509 6461","author":"van hasselt","year":"2015","journal-title":"CoRR"},{"key":"ref6","article-title":"Deep recurrent q-learning for partially observable mdps","volume":"abs 1507 6527","author":"hausknecht","year":"2015","journal-title":"CoRR"},{"key":"ref5","article-title":"Dueling network architectures for deep reinforcement learning","volume":"abs 1511 6581","author":"wang","year":"2015","journal-title":"CoRR"},{"journal-title":"Introduction to Reinforcement Learning","year":"1998","author":"sutton","key":"ref8"},{"key":"ref7","doi-asserted-by":"crossref","first-page":"3521","DOI":"10.1073\/pnas.1611835114","article-title":"Overcoming catastrophic forgetting in neural networks","volume":"114","author":"kirkpatrick","year":"0","journal-title":"Proceedings of the National Academy of Sciences"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref9","article-title":"Deep decentralized multi-task multi-agent reinforcement learning under partial observability","volume":"abs 1703 6182","author":"omidshafiei","year":"2017","journal-title":"CoRR"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref20","article-title":"Stabilising experience replay for deep multi-agent reinforcement learning","volume":"abs 1702 8887","author":"foerster","year":"2017","journal-title":"CoRR"},{"key":"ref22","article-title":"Sample efficient actor-critic with experience replay","volume":"abs 1611 1224","author":"wang","year":"2016","journal-title":"CoRR"},{"key":"ref21","article-title":"Hindsight experience replay","volume":"abs 1707 1495","author":"andrychowicz","year":"2017","journal-title":"CoRR"},{"key":"ref24","article-title":"Practical recommendations for gradient-based training of deep architectures","volume":"abs 1206 5533","author":"bengio","year":"2012","journal-title":"CoRR"},{"key":"ref23","first-page":"503","article-title":"Tree-based batch mode reinforcement learning","volume":"6","author":"ernst","year":"2005","journal-title":"J Mach Learn Res"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.23919\/WAC.2018.8430473"}],"event":{"name":"2018 World Automation Congress (WAC)","start":{"date-parts":[[2018,6,3]]},"location":"Stevenson, WA","end":{"date-parts":[[2018,6,6]]}},"container-title":["2018 World Automation Congress (WAC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8410978\/8430291\/08430409.pdf?arnumber=8430409","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,26]],"date-time":"2022-01-26T15:54:45Z","timestamp":1643212485000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8430409\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,6]]},"references-count":25,"URL":"https:\/\/doi.org\/10.23919\/wac.2018.8430409","relation":{},"subject":[],"published":{"date-parts":[[2018,6]]}}}