{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,8,15]],"date-time":"2024-08-15T14:33:27Z","timestamp":1723732407632},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"15","license":[{"start":{"date-parts":[[2021,3,3]],"date-time":"2021-03-03T00:00:00Z","timestamp":1614729600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,3,3]],"date-time":"2021-03-03T00:00:00Z","timestamp":1614729600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"The National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["61772355","61702055","61472262"],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"The Natural Science Research University Major Projects of Jiangsu Province","award":["18KJA520011","17KJA520004"]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput & Applic"],"published-print":{"date-parts":[[2021,8]]},"DOI":"10.1007\/s00521-021-05738-9","type":"journal-article","created":{"date-parts":[[2021,3,3]],"date-time":"2021-03-03T08:09:43Z","timestamp":1614758983000},"page":"9723-9732","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Self-guided deep deterministic policy gradient with multi-actor"],"prefix":"10.1007","volume":"33","author":[{"given":"Hongming","family":"Chen","sequence":"first","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0002-8710-1810","authenticated-orcid":false,"given":"Quan","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Shan","family":"Zhong","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,3,3]]},"reference":[{"key":"5738_CR1","doi-asserted-by":"publisher","first-page":"421","DOI":"10.1177\/0278364917710318","volume":"37","author":"S Levine","year":"2018","unstructured":"Levine S, Pastor P, Krizhevsky A, Ibarz J, Quillen D (2018) Learning hand-eye coordination for robotic grasping with deep learning and large-scale data collection. Int J Robot Res 37:421\u2013436","journal-title":"Int J Robot Res"},{"key":"5738_CR2","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1007\/s00521-013-1455-2","volume":"25","author":"T Li","year":"2014","unstructured":"Li T, Liu YJ, Tong SC (2014) Adaptive neural control using reinforcement learning for a class of robot manipulator. Neural Comput Appl 25:135\u2013141","journal-title":"Neural Comput Appl"},{"key":"5738_CR3","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver D et al (2016) Mastering the game of go with deep neural networks and tree search. Nature 529:484\u2013489","journal-title":"Nature"},{"key":"5738_CR4","unstructured":"Jiang ZY, Xu DX, Liang JJ (2017) A deep reinforcement learning framework for the financial portfolio management problem. arXiv preprint arXiv:1706.10059"},{"key":"5738_CR5","doi-asserted-by":"publisher","first-page":"32400","DOI":"10.1109\/ACCESS.2019.2901300","volume":"7","author":"MX Jiang","year":"2019","unstructured":"Jiang MX, Hai T, Pan ZG, Wang HY, Jia YJ, Deng C (2019) Multi-agent deep reinforcement learning for multi-object tracker. IEEE Access 7:32400\u201332407","journal-title":"IEEE Access"},{"key":"5738_CR6","doi-asserted-by":"publisher","unstructured":"Han JW, Yang L, Zhang DW, Chang XJ, Liang XD (2018) Reinforcement cutting-agent learning for video object segmentation. In: 2018 IEEE conference on computer vision and pattern recognition, pp 9080\u20139089.https:\/\/doi.org\/10.1109\/CVPR.2018.00946","DOI":"10.1109\/CVPR.2018.00946"},{"key":"5738_CR7","unstructured":"Ganin Y, Kulkarni T, Babuschkin I, Eslami SMA, Vinyals O (2018) Synthesizing programs for images using reinforced adversarial learning. In: Proceedings of the 35th international conference on machine learning, pp 1652\u20131661"},{"key":"5738_CR8","doi-asserted-by":"publisher","unstructured":"Li JW, Monroe W, Ritter A, Jurafsky D, Galley M, Gao JF (2016) Deep reinforcement learning for dialogue generation. In: Proceedings of the 2016 conference on empirical methods in natural language processing, pp 1192\u20131202. https:\/\/doi.org\/10.18653\/v1\/d16-1127","DOI":"10.18653\/v1\/d16-1127"},{"key":"5738_CR9","doi-asserted-by":"publisher","unstructured":"Yin QY, Zhang Y, Zhang WN, Liu T, Wang WY (2018) Deep reinforcement learning for Chinese zero pronoun resolution. In: Proceedings of the 56th annual meeting of the association for computational linguistics, pp 569\u2013578. https:\/\/doi.org\/10.18653\/v1\/P18-1053","DOI":"10.18653\/v1\/P18-1053"},{"key":"5738_CR10","doi-asserted-by":"crossref","unstructured":"Feng J, Huang ML, Zhao L, Yang Y, Zhu XY (2018) Reinforcement learning for relation classification from noisy data. In: Proceedings of the thirty-second AAAI conference on artificial intelligence, pp 5779\u20135786","DOI":"10.1609\/aaai.v32i1.12063"},{"key":"5738_CR11","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton RS, Barto AG (1998) Reinforcement learning: an introduction. MIT Press, Cambridge"},{"key":"5738_CR12","first-page":"279","volume":"8","author":"C Watkins","year":"1992","unstructured":"Watkins C, Christopher J, Dayan P (1992) Q-learning. Mach Learn 8:279\u2013292","journal-title":"Mach Learn"},{"key":"5738_CR13","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V et al (2015) Human-level control through deep reinforcement learning. Nature 518:529\u2013533. https:\/\/doi.org\/10.1038\/nature14236","journal-title":"Nature"},{"key":"5738_CR14","unstructured":"Wang ZY, Schaul S, Hessel M, Hasselt HV, Lanctot M, Freitas ND (2016) Dueling network architectures for deep reinforcement learning. In: Proceedings of the 33rd international conference on machine learning, pp 1995\u20132003"},{"key":"5738_CR15","doi-asserted-by":"crossref","unstructured":"Hasselt HV, Gueza A, Silver D (2016) Deep reinforcement learning with double Q-learning. In: Proceedings of the thirtieth AAAI conference on artificial intelligence, pp 2094\u20132100","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"5738_CR16","unstructured":"Hausknecht MJ, Stone P (2015) Deep recurrent Q-learning for partially observable MDPs. In: Proceedings of the 2015 AAAI conference on artificial intelligence, pp 29\u201337"},{"key":"5738_CR17","unstructured":"Konda VR, Tsitsiklis JN (2000) Actor-critic algorithms. In: Proceedings of the 12nd neural information processing system, pp 1008\u20131014"},{"key":"5738_CR18","first-page":"A187","volume":"8","author":"TP Lillicrap","year":"2015","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2015) Continuous control with deep reinforcement learning. Comput Sci 8:A187","journal-title":"Comput Sci"},{"key":"5738_CR19","unstructured":"Mnih V, Badia AP, Mirza M, Graves A, Harley T, Lillicrap TP, Silver D, Kavukcuoglu K (2016) Asynchronous methods for deep reinforcement learning. In: Proceedings of 33rd international conference of machine learning, pp 1928\u20131937"},{"key":"5738_CR20","unstructured":"Silver D, Lever G, Heess N, Degris T, Wierstra D, Riedmiller MA (2014) Deterministic policy gradient algorithms. In: Proceedings of 31st international conference of machine learning, pp 387\u2013395"},{"key":"5738_CR21","unstructured":"Tangkaratt V, Abdolmaleki A, Sugiyama M (2018) Guide actor-critic for continuous control. In: 6th international conference on learning representations, pp 427\u2013438"},{"key":"5738_CR22","unstructured":"Sutton RS, McAllester D, Singh S, Mansour Y (2000) Policy gradient methods for reinforcement learning with function approximation. In: Proceedings of the 12nd neural information processing system, pp 1057\u20131063"},{"key":"5738_CR23","first-page":"91","volume":"5","author":"P Wawrzynski","year":"2015","unstructured":"Wawrzynski P (2015) Control policy with autocorrelated noise in reinforcement learning for robotics. Mach Learn 5:91\u201395","journal-title":"Mach Learn"},{"key":"5738_CR24","unstructured":"Goodfellow IJ, Abadie JP, Mirza M, Xu B, Farley DW, Ozair S, Courville A, Bengio Y (2014) Generative adversarial nets. In: Proceedings of the 27th neural information processing system, pp 2672\u20132680"},{"key":"5738_CR25","doi-asserted-by":"publisher","first-page":"823","DOI":"10.1103\/PhysRev.36.823","volume":"36","author":"GE Uhlenbeck","year":"1930","unstructured":"Uhlenbeck GE, Ornstein LS (1930) On the theory of Brownian motion. Phys Rev 36:823\u2013841","journal-title":"Phys Rev"},{"key":"5738_CR26","unstructured":"Pfau D, Vinyals O (2016) Connecting generative adversarial networks and actor-critic methods. arXiv preprint arXiv:1610.01945"},{"key":"5738_CR27","unstructured":"Lowe R, Yi W, Aviv T, Jean H, Pieter A, Igor M (2017) Multi-agent actor-critic for mixed cooperative-competitive environments. In: Proceedings of the 30th neural information processing system, pp 6382\u20136393"},{"key":"5738_CR28","unstructured":"Ho J, Ermon S (2016) Generative adversarial imitation learning. In: Proceedings of the 29th neural information processing system, pp 4565\u20134573"},{"key":"5738_CR29","doi-asserted-by":"crossref","unstructured":"Wu L, Li Z, Tao Q, Lai J, Liu T-Y (2017) Sequence prediction with unlabeled data by reward function learning. In: IJCAI, pp 3098\u20133104","DOI":"10.24963\/ijcai.2017\/432"},{"issue":"3","key":"5738_CR30","doi-asserted-by":"publisher","first-page":"230","DOI":"10.1109\/TAMD.2010.2056368","volume":"2","author":"J Schmidhuber","year":"2010","unstructured":"Schmidhuber J (2010) Formal theory of creativity, fun, and intrinsic motivation (1990\u20132010). IEEE Trans Auton Ment Dev 2(3):230\u2013247","journal-title":"IEEE Trans Auton Ment Dev"},{"key":"5738_CR31","unstructured":"Burda Y, Edwards H, Pathak D, Storkey AJ, Darrell T, Efros AA (2019) Large-scale study of curiosity-driven learning. In: 7th international conference on learning representations, New Orleans, LA, USA, May 6\u20139"},{"key":"5738_CR32","unstructured":"Brockman G, Cheung V, Pettersson L, Schneider J, Schulman J, Tang J, Zaremba W (2016) OpenAI gym. arXiv preprint arXiv:1606.01540"},{"key":"5738_CR33","doi-asserted-by":"crossref","unstructured":"Todorov E, Erez T, Tassa Y (2012) MuJoCo: a physics engine for model-based control. In: Proceedings of 2012 IEEE international conference intelligent robots systems, pp 5026\u20135033","DOI":"10.1109\/IROS.2012.6386109"},{"key":"5738_CR34","unstructured":"Schulman J, Levine S, Abbeel P (2015) Trust region policy optimization. In: Proceedings of 32nd international conference of machine learning, pp 1889\u20131897"},{"key":"5738_CR35","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347"},{"key":"5738_CR36","unstructured":"Dhariwa P et al (2017) OpenAI baselines. Github. https:\/\/github.com\/openai\/baselines"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-021-05738-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-021-05738-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-021-05738-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,19]],"date-time":"2022-12-19T21:16:53Z","timestamp":1671484613000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-021-05738-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,3,3]]},"references-count":36,"journal-issue":{"issue":"15","published-print":{"date-parts":[[2021,8]]}},"alternative-id":["5738"],"URL":"https:\/\/doi.org\/10.1007\/s00521-021-05738-9","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,3,3]]},"assertion":[{"value":"7 January 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 January 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 March 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Compliance with ethical standards"}},{"value":"We declare that we do not have any commercial or associative interest that represents a conflict of interest in connection with the work submitted.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}