{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T06:04:23Z","timestamp":1740117863365,"version":"3.37.3"},"reference-count":47,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2020,9,1]],"date-time":"2020-09-01T00:00:00Z","timestamp":1598918400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61976132","61170155"],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[2020,9]]},"DOI":"10.1016\/j.neucom.2019.06.110","type":"journal-article","created":{"date-parts":[[2020,3,14]],"date-time":"2020-03-14T02:38:14Z","timestamp":1584153494000},"page":"157-168","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":14,"special_numbering":"C","title":["Accelerating deep reinforcement learning model for game strategy"],"prefix":"10.1016","volume":"408","author":[{"given":"Yifan","family":"Li","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7085-8876","authenticated-orcid":false,"given":"Yuchun","family":"Fang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5026-5416","authenticated-orcid":false,"given":"Zahid","family":"Akhtar","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.neucom.2019.06.110_bib0001","unstructured":"V. Mnih, K. Kavukcuoglu, D. Silver, A. Graves, I. Antonoglou, D. Wierstra, M. Riedmiller, Playing Atari with deep reinforcement learning, arXiv:\/1312.5602 (2013)."},{"issue":"9","key":"10.1016\/j.neucom.2019.06.110_bib0002","first-page":"1799","article-title":"Deep learning: yesterday, today, and tomorrow","volume":"50","author":"Yu","year":"2013","journal-title":"J. Comput. Res. Dev."},{"year":"1986","series-title":"Learning representations by back-propagating errors","author":"Rumelhart","key":"10.1016\/j.neucom.2019.06.110_bib0003"},{"key":"10.1016\/j.neucom.2019.06.110_bib0004","series-title":"Advances in Neural Information Processing Systems","first-page":"1097","article-title":"Imagenet classification with deep convolutional neural networks","author":"Krizhevsky","year":"2012"},{"issue":"3","key":"10.1016\/j.neucom.2019.06.110_bib0005","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","article-title":"Imagenet large scale visual recognition challenge","volume":"115","author":"Russakovsky","year":"2015","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.neucom.2019.06.110_bib0006","series-title":"Proceedings of the IEEE conference on Computer Vision and Pattern Recognition","first-page":"1725","article-title":"Large-scale video classification with convolutional neural networks","author":"Karpathy","year":"2014"},{"key":"10.1016\/j.neucom.2019.06.110_bib0007","doi-asserted-by":"crossref","unstructured":"K. Cho, B. Van Merri\u00ebnboer, C. Gulcehre, D. Bahdanau, F. Bougares, H. Schwenk, Y. Bengio, Learning phrase representations using RNN encoder-decoder for statistical machine translation, arXiv:\/1406.1078 (2014).","DOI":"10.3115\/v1\/D14-1179"},{"issue":"9","key":"10.1016\/j.neucom.2019.06.110_bib0008","first-page":"184","article-title":"Similar handwritten chinese character recognition based on deep neural networks with big data","volume":"35","author":"Yang","year":"2014","journal-title":"J. Commun."},{"key":"10.1016\/j.neucom.2019.06.110_bib0009","series-title":"Acoustics Speech Signal Processing (ICASSP), 2013 IEEE International Conference on","first-page":"6645","article-title":"Speech recognition with deep recurrent neural networks","author":"Graves","year":"2013"},{"issue":"9","key":"10.1016\/j.neucom.2019.06.110_bib0010","first-page":"1936","article-title":"A study of speech recognition based on RNN-RBM language model","volume":"51","author":"Li","year":"2014","journal-title":"J. Comput. Res. Dev."},{"issue":"8","key":"10.1016\/j.neucom.2019.06.110_bib0011","first-page":"2806","article-title":"Overview of deep learning","volume":"29","author":"Sun","year":"2012","journal-title":"Jisuanji Yingyong Yanjiu"},{"key":"10.1016\/j.neucom.2019.06.110_bib0012","unstructured":"R.S. Suton, A.G. Barto, Reinforcement learning: An introduction. A bradford book, 2002."},{"key":"10.1016\/j.neucom.2019.06.110_sbref0010","doi-asserted-by":"crossref","DOI":"10.1007\/s40815-018-0545-9","article-title":"Adaptive fuzzy control for non-triangular structural stochastic switched nonlinear systems with full state constraints","author":"Sun","year":"2018","journal-title":"IEEE Transactions on Fuzzy Systems"},{"key":"10.1016\/j.neucom.2019.06.110_sbref0011","doi-asserted-by":"crossref","DOI":"10.1109\/TFUZZ.2019.2895560","article-title":"Observer-based fuzzy adaptive event-triggered control for pure-feedback nonlinear systems with prescribed performance","author":"Qiu","year":"2019","journal-title":"IEEE Transactions on Fuzzy Systems"},{"issue":"11","key":"10.1016\/j.neucom.2019.06.110_bib0015","doi-asserted-by":"crossref","first-page":"1238","DOI":"10.1177\/0278364913495721","article-title":"Reinforcement learning in robotics: a survey","volume":"32","author":"Kober","year":"2013","journal-title":"Int. J. Rob. Res."},{"key":"10.1016\/j.neucom.2019.06.110_bib0016","series-title":"Applications of Neural Networks","first-page":"267","article-title":"Td-gammon: A self-teaching Backgammon program","author":"Tesauro","year":"1995"},{"key":"10.1016\/j.neucom.2019.06.110_bib0017","series-title":"European Conference on Machine Learning","first-page":"282","article-title":"Bandit based Monte-Carlo planning","author":"Kocsis","year":"2006"},{"issue":"3","key":"10.1016\/j.neucom.2019.06.110_bib0018","first-page":"677","article-title":"A novel off policy q (\u03bb) algorithm based on linear function approximation","volume":"37","author":"Qi-Ming","year":"2014","journal-title":"Chin. J. Comput."},{"issue":"8","key":"10.1016\/j.neucom.2019.06.110_bib0019","first-page":"1372","article-title":"Study on an average reward reinforcement learning algorithm","volume":"30","author":"Gao","year":"2007","journal-title":"Chin. J. Comput."},{"issue":"5","key":"10.1016\/j.neucom.2019.06.110_bib0020","first-page":"765","article-title":"A reinforcement learning-based approach to dynamic job-shop scheduling","volume":"31","author":"Wei","year":"2005","journal-title":"Acta Autom. Sin."},{"key":"10.1016\/j.neucom.2019.06.110_bib0021","series-title":"ACM SIGARCH Computer Architecture News","first-page":"39","article-title":"Self-optimizing memory controllers: A reinforcement learning approach","volume":"36","author":"Ipek","year":"2008"},{"issue":"7540","key":"10.1016\/j.neucom.2019.06.110_bib0022","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"Mnih","year":"2015","journal-title":"Nature"},{"key":"10.1016\/j.neucom.2019.06.110_bib0023","series-title":"International Conference on Machine Learning","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"Mnih","year":"2016"},{"issue":"7676","key":"10.1016\/j.neucom.2019.06.110_bib0024","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1038\/nature24270","article-title":"Mastering the game of go without human knowledge","volume":"550","author":"Silver","year":"2017","journal-title":"Nature"},{"issue":"7587","key":"10.1016\/j.neucom.2019.06.110_bib0025","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of go with deep neural networks and tree search","volume":"529","author":"Silver","year":"2016","journal-title":"Nature"},{"key":"10.1016\/j.neucom.2019.06.110_bib0026","series-title":"Advances in Neural Information Processing Systems","first-page":"2863","article-title":"Action-conditional video prediction using deep networks in Atari games","author":"Oh","year":"2015"},{"key":"10.1016\/j.neucom.2019.06.110_bib0027","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"2488","article-title":"Active object localization with deep reinforcement learning","author":"Caicedo","year":"2015"},{"key":"10.1016\/j.neucom.2019.06.110_bib0028","unstructured":"T.P. Lillicrap, J.J. Hunt, A. Pritzel, N. Heess, T. Erez, Y. Tassa, D. Silver, D. Wierstra, Continuous control with deep reinforcement learning, arXiv:\/1509.02971 (2015)."},{"key":"10.1016\/j.neucom.2019.06.110_bib0029","series-title":"International Conference on Machine Learning","first-page":"1329","article-title":"Benchmarking deep reinforcement learning for continuous control","author":"Duan","year":"2016"},{"key":"10.1016\/j.neucom.2019.06.110_bib0030","series-title":"International Conference on Machine Learning","first-page":"2829","article-title":"Continuous deep q-learning with model-based acceleration","author":"Gu","year":"2016"},{"key":"10.1016\/j.neucom.2019.06.110_bib0031","unstructured":"S. Hansen, Using deep q-learning to control optimization hyperparameters, arXiv:\/1602.04062 (2016)."},{"key":"10.1016\/j.neucom.2019.06.110_bib0032","series-title":"Advances in Neural Information Processing Systems","first-page":"3981","article-title":"Learning to learn by gradient descent by gradient descent","author":"Andrychowicz","year":"2016"},{"key":"10.1016\/j.neucom.2019.06.110_bib0033","unstructured":"A.G. Howard, M. Zhu, B. Chen, D. Kalenichenko, W. Wang, T. Weyand, M. Andreetto, H. Adam, Mobilenets: efficient convolutional neural networks for mobile vision applications, arXiv:\/1704.04861 (2017)."},{"key":"10.1016\/j.neucom.2019.06.110_bib0034","series-title":"European Conference on Computer Vision","first-page":"525","article-title":"XNOR-net: Imagenet classification using binary convolutional neural networks","author":"Rastegari","year":"2016"},{"key":"10.1016\/j.neucom.2019.06.110_bib0035","series-title":"International Conference on Neural Information Processing","first-page":"303","article-title":"Accelerating spatio-temporal deep reinforcement learning model for game strategy","author":"Li","year":"2018"},{"key":"10.1016\/j.neucom.2019.06.110_bib0036","unstructured":"O. Vinyals, T. Ewalds, S. Bartunov, P. Georgiev, A.S. Vezhnevets, M. Yeo, A. Makhzani, H. K\u00fcttler, J. Agapiou, J. Schrittwieser, et\u00a0al., Starcraft ii: a new challenge for reinforcement learning, arXiv:\/1708.04782 (2017)."},{"issue":"7540","key":"10.1016\/j.neucom.2019.06.110_bib0037","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"Volodymyr","year":"2015","journal-title":"Nature"},{"year":"2017","series-title":"Proximal policy optimization algorithms","author":"Schulman","key":"10.1016\/j.neucom.2019.06.110_bib0038"},{"key":"10.1016\/j.neucom.2019.06.110_bib0039","first-page":"1889","article-title":"Trust region policy optimization","author":"Schulman","year":"2015","journal-title":"Comput. Sci."},{"year":"2016","series-title":"Reinforcement learning with unsupervised auxiliary tasks","author":"Jaderberg","key":"10.1016\/j.neucom.2019.06.110_bib0040"},{"key":"10.1016\/j.neucom.2019.06.110_bib0041","article-title":"Hierarchical critics assignment for multi-agent reinforcement learning","author":"Cao","year":"2019","journal-title":"CoRR"},{"issue":"1","key":"10.1016\/j.neucom.2019.06.110_bib0042","doi-asserted-by":"crossref","first-page":"253","DOI":"10.1613\/jair.3912","article-title":"The arcade learning environment: an evaluation platform for general agents","volume":"47","author":"Bellemare","year":"2013","journal-title":"J. Artif. Intell. Res."},{"year":"2017","series-title":"Rainbow: Combining improvements in deep reinforcement learning","author":"Hessel","key":"10.1016\/j.neucom.2019.06.110_bib0043"},{"key":"10.1016\/j.neucom.2019.06.110_bib0044","unstructured":"D.P. Kingma, J. Ba, Adam: a method for stochastic optimization, arXiv:\/1412.6980 (2014)."},{"key":"10.1016\/j.neucom.2019.06.110_bib0045","series-title":"Dueling network architectures for deep reinforcement learning","first-page":"1995","author":"Wang","year":"2015"},{"key":"10.1016\/j.neucom.2019.06.110_bib0046","unstructured":"Dario, Jack, Faulty reward functions in the wild, (https:\/\/blog.openai.com\/faulty-reward-functions)."},{"key":"10.1016\/j.neucom.2019.06.110_bib0047","series-title":"2014 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)","first-page":"76","article-title":"Applying the roofline model","author":"Ofenbeck","year":"2014"}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231220303337?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231220303337?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2020,8,24]],"date-time":"2020-08-24T23:00:47Z","timestamp":1598310047000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0925231220303337"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,9]]},"references-count":47,"alternative-id":["S0925231220303337"],"URL":"https:\/\/doi.org\/10.1016\/j.neucom.2019.06.110","relation":{},"ISSN":["0925-2312"],"issn-type":[{"type":"print","value":"0925-2312"}],"subject":[],"published":{"date-parts":[[2020,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Accelerating deep reinforcement learning model for game strategy","name":"articletitle","label":"Article Title"},{"value":"Neurocomputing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neucom.2019.06.110","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2020 Elsevier B.V. All rights reserved.","name":"copyright","label":"Copyright"}]}}