{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T02:25:37Z","timestamp":1740104737295,"version":"3.37.3"},"reference-count":45,"publisher":"Wiley","issue":"11","license":[{"start":{"date-parts":[[2022,7,4]],"date-time":"2022-07-04T00:00:00Z","timestamp":1656892800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/onlinelibrary.wiley.com\/termsAndConditions#vor"},{"start":{"date-parts":[[2022,7,4]],"date-time":"2022-07-04T00:00:00Z","timestamp":1656892800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/doi.wiley.com\/10.1002\/tdm_license_1.1"}],"content-domain":{"domain":["onlinelibrary.wiley.com"],"crossmark-restriction":true},"short-container-title":["Int J of Intelligent Sys"],"published-print":{"date-parts":[[2022,11]]},"DOI":"10.1002\/int.22945","type":"journal-article","created":{"date-parts":[[2022,7,4]],"date-time":"2022-07-04T12:05:21Z","timestamp":1656936321000},"page":"8387-8411","update-policy":"https:\/\/doi.org\/10.1002\/crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Multiagent reinforcement learning for strictly constrained tasks based on Reward Recorder"],"prefix":"10.1155","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1860-9992","authenticated-orcid":false,"given":"Lifu","family":"Ding","sequence":"first","affiliation":[{"name":"College of Electrical Engineering Zhejiang University Hangzhou Zhejiang China"}]},{"given":"Gangfeng","family":"Yan","sequence":"additional","affiliation":[{"name":"College of Electrical Engineering Zhejiang University Hangzhou Zhejiang China"}]},{"given":"Jianing","family":"Liu","sequence":"additional","affiliation":[{"name":"Guangdong Power Grid Power Dispatch Control Center Guangdong Power Grid Corporation Guangzhou Guangdong China"}]}],"member":"311","published-online":{"date-parts":[[2022,7,4]]},"reference":[{"key":"e_1_2_11_2_1","first-page":"370","volume-title":"Distributed Deep Reinforcement Learning: Learn How to Play Atari Games in 21 Minutes","author":"Adamski I","year":"2018"},{"key":"e_1_2_11_3_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1008942012299"},{"key":"e_1_2_11_4_1","first-page":"441","volume-title":"Game Theory and Multi\u2010agent Reinforcement Learning","author":"Now\u00e9 A","year":"2012"},{"key":"e_1_2_11_5_1","first-page":"126","article-title":"Online reinforcement learning multiplayer non\u2010zero sum games of continuous\u2010time Markov jump linear systems","volume":"412","author":"Xin X","year":"2022","journal-title":"Appl Math Comput"},{"key":"e_1_2_11_6_1","first-page":"330","volume-title":"Multi\u2010Agent Reinforcement Learning: Independent vs. Cooperative Agents","author":"Tan M.","year":"1993"},{"key":"e_1_2_11_7_1","first-page":"5887","volume-title":"Qtran: Learning to Factorize with Transformation for Cooperative Multi\u2010agent Reinforcement Learning","author":"Son K","year":"2019"},{"key":"e_1_2_11_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2020.05.097"},{"key":"e_1_2_11_9_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"e_1_2_11_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2020.3033782"},{"key":"e_1_2_11_11_1","unstructured":"Shalev\u2010ShwartzS ShammahS ShashuaA. Safe multi\u2010agent reinforcement learning for autonomous driving.2016.arXiv:1610.03295."},{"key":"e_1_2_11_12_1","doi-asserted-by":"publisher","DOI":"10.1111\/mice.12702"},{"key":"e_1_2_11_13_1","doi-asserted-by":"publisher","DOI":"10.1002\/9781119562306.ch21"},{"key":"e_1_2_11_14_1","unstructured":"RozadaS ApostolopoulouD AlonsoE.Deep multi\u2010agent reinforcement learning for cost efficient distributed load frequency control.2020.arXiv:2010.06293."},{"key":"e_1_2_11_15_1","doi-asserted-by":"publisher","DOI":"10.1002\/er.4559"},{"key":"e_1_2_11_16_1","doi-asserted-by":"publisher","DOI":"10.1049\/cmu2.12177"},{"key":"e_1_2_11_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TR.2017.2765352"},{"key":"e_1_2_11_18_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TCYB.2021.3112699","article-title":"Asynchronous fault detection observer for 2\u2010D Markov jump systems","volume":"1","author":"Cheng P","year":"2021","journal-title":"IEEE Trans Cybern"},{"key":"e_1_2_11_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/TFUZZ.2021.3136359"},{"key":"e_1_2_11_20_1","doi-asserted-by":"publisher","DOI":"10.1002\/rnc.5994"},{"key":"e_1_2_11_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2021.3110412"},{"key":"e_1_2_11_22_1","unstructured":"SunehagP LeverG GruslysA et al.Value\u2010decomposition networks for cooperative multi\u2010agent learning.2017;arXiv:1706.05296."},{"key":"e_1_2_11_23_1","unstructured":"RashidT SamvelyanM WittdCS FarquharG FoersterJN WhitesonS.QMIX: monotonic value function factorisation for deep multi\u2010agent reinforcement learning.2018;arXiv:1803.11485."},{"key":"e_1_2_11_24_1","unstructured":"LoweR WuY TamarA HarbJ AbbeelP MordatchI.Multi\u2010agent actor\u2013critic for mixed cooperative\u2010competitive environments.2017;arXiv:1706.02275."},{"key":"e_1_2_11_25_1","first-page":"535","volume-title":"An Algorithm for Distributed Reinforcement Learning in Cooperative Multi\u2010Agent Systems","author":"Lauer M","year":"2000"},{"key":"e_1_2_11_26_1","unstructured":"VecerikM HesterT ScholzJ et al.Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards.2017;arXiv:1707.08817."},{"key":"e_1_2_11_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2020.3034424"},{"key":"e_1_2_11_28_1","doi-asserted-by":"publisher","DOI":"10.1002\/int.22648"},{"key":"e_1_2_11_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2015.2416927"},{"key":"e_1_2_11_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSII.2018.2842085"},{"key":"e_1_2_11_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPWRS.2013.2271640"},{"key":"e_1_2_11_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2894305"},{"key":"e_1_2_11_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2020.2994468"},{"key":"e_1_2_11_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2020.2998766"},{"key":"e_1_2_11_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSYST.2020.3007795"},{"key":"e_1_2_11_36_1","first-page":"51","volume-title":"Temporal Difference Learning, SARSA, and Q\u2010Learning","author":"Sewak M.","year":"2019"},{"key":"e_1_2_11_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2014.2363096"},{"key":"e_1_2_11_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2019.2933443"},{"key":"e_1_2_11_39_1","doi-asserted-by":"publisher","DOI":"10.1007\/s12532-008-0001-1"},{"key":"e_1_2_11_40_1","first-page":"2","volume-title":"Introduction to nloptr: an R Interface to NLopt","author":"Ypma J.","year":"2014"},{"key":"e_1_2_11_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPWRS.2006.889132"},{"key":"e_1_2_11_42_1","first-page":"157","volume-title":"Markov Games as a Framework for Multi\u2010Agent Reinforcement Learning","author":"Littman ML","year":"1994"},{"key":"e_1_2_11_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPWRS.2018.2889989"},{"issue":"9","key":"e_1_2_11_44_1","first-page":"4146","article-title":"Distributed Q\u2010learning\u2010based online optimization algorithm for unit commitment and dispatch in smart grid","volume":"50","author":"Li F","year":"2019","journal-title":"IEEE Trans Syst Man Cybern"},{"key":"e_1_2_11_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2017.2669896"},{"key":"e_1_2_11_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2018.2852602"}],"container-title":["International Journal of Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/onlinelibrary.wiley.com\/doi\/pdf\/10.1002\/int.22945","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/onlinelibrary.wiley.com\/doi\/full-xml\/10.1002\/int.22945","content-type":"application\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/onlinelibrary.wiley.com\/doi\/pdf\/10.1002\/int.22945","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,9]],"date-time":"2023-02-09T15:32:40Z","timestamp":1675956760000},"score":1,"resource":{"primary":{"URL":"https:\/\/onlinelibrary.wiley.com\/doi\/10.1002\/int.22945"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7,4]]},"references-count":45,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2022,11]]}},"alternative-id":["10.1002\/int.22945"],"URL":"https:\/\/doi.org\/10.1002\/int.22945","archive":["Portico"],"relation":{},"ISSN":["0884-8173","1098-111X"],"issn-type":[{"type":"print","value":"0884-8173"},{"type":"electronic","value":"1098-111X"}],"subject":[],"published":{"date-parts":[[2022,7,4]]},"assertion":[{"value":"2022-03-31","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2022-06-05","order":1,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2022-07-04","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}