{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T20:33:26Z","timestamp":1725914006482},"publisher-location":"Cham","reference-count":17,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319700922"},{"type":"electronic","value":"9783319700939"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-70093-9_50","type":"book-chapter","created":{"date-parts":[[2017,10,23]],"date-time":"2017-10-23T18:48:48Z","timestamp":1508784528000},"page":"475-483","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":20,"title":["Deep Reinforcement Learning: From Q-Learning to Deep Q-Learning"],"prefix":"10.1007","author":[{"given":"Fuxiao","family":"Tan","sequence":"first","affiliation":[]},{"given":"Pengfei","family":"Yan","sequence":"additional","affiliation":[]},{"given":"Xinping","family":"Guan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,10,24]]},"reference":[{"issue":"7","key":"50_CR1","doi-asserted-by":"publisher","first-page":"1527","DOI":"10.1162\/neco.2006.18.7.1527","volume":"18","author":"GE Hinton","year":"2006","unstructured":"Hinton, G.E., Osindero, S., Teh, Y.W.: A fast learning algorithm for deep belief nets. Neural Comput. 18(7), 1527\u20131554 (2006)","journal-title":"Neural Comput."},{"issue":"7553","key":"50_CR2","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., Hinton, G.: Deep learning. Nature 521(7553), 436\u2013444 (2015)","journal-title":"Nature"},{"issue":"7587","key":"50_CR3","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., Huang, A., Maddison, C.J., Guez, A., Sifre, L., van den Driessche, G., Schrittwieser, J., Antonoglou, I., Panneershelvam, V., Lanctot, M., Dieleman, S., Grewe, D., Nham, J., Kalchbrenner, N., Sutskever, I., Lillicrap, T., Leach, M., Kavukcuoglu, K., Graepel, T., Hassabis, D.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"issue":"5786","key":"50_CR4","doi-asserted-by":"publisher","first-page":"504","DOI":"10.1126\/science.1127647","volume":"313","author":"GE Hinton","year":"2006","unstructured":"Hinton, G.E., Salakhutdinov, R.R.: Reducing the dimensionality of data with neural networks. Science 313(5786), 504\u2013507 (2006)","journal-title":"Science"},{"issue":"7540","key":"50_CR5","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A.A., Veness, J., Bellemare, M.G., Graves, A., Riedmiller, M., Fidjeland, A.K., Ostrovski, G., Petersen, S., Beattie, C., Sadik, A., Antonoglou, I., King, H., Kumaran, D., Wierstra, D., Legg, S., Hassabis, D.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"issue":"8","key":"50_CR6","doi-asserted-by":"publisher","first-page":"1798","DOI":"10.1109\/TPAMI.2013.50","volume":"35","author":"Y Bengio","year":"2013","unstructured":"Bengio, Y., Courville, A., Vincent, P.: Representation learning: a review and new perspectives. IEEE Trans. Pattern Anal. Mach. Intell. 35(8), 1798\u20131828 (2013)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"50_CR7","unstructured":"Werbos, P.J.: Approximate dynamic programming for realtime control and neural modeling. In: Handbook of Intelligent Control: Neural, Fuzzy, and Adaptive Approaches. Van Nostrand Reinhold, New York (1992)"},{"volume-title":"Reinforcement Learning: An Introduction","year":"1998","author":"RS Sutton","key":"50_CR8","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"issue":"12","key":"50_CR9","doi-asserted-by":"publisher","first-page":"2834","DOI":"10.1109\/TCYB.2014.2357896","volume":"44","author":"DR Liu","year":"2014","unstructured":"Liu, D.R., Wang, D., Wang, F.Y., Li, H.L., Yang, X.: Neural-network-based Online HJB solution for optimal robust guaranteed cost control of continuous-time uncertain nonlinear systems. IEEE Trans. Cybern. 44(12), 2834\u20132847 (2014)","journal-title":"IEEE Trans. Cybern."},{"key":"50_CR10","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1016\/j.neunet.2014.09.003","volume":"61","author":"J Schmidhuber","year":"2015","unstructured":"Schmidhuber, J.: Deep learning in neural networks: an overview. Neural Netw. 61, 85\u2013117 (2015)","journal-title":"Neural Netw."},{"issue":"2","key":"50_CR11","doi-asserted-by":"publisher","first-page":"418","DOI":"10.1109\/TNNLS.2013.2280013","volume":"25","author":"DR Liu","year":"2014","unstructured":"Liu, D.R., Wang, D., Li, H.: Decentralized stabilization for a class of continuous-time nonlinear interconnected systems using online learning optimal control approach. IEEE Trans. Neural Netw. Learn. Syst. 25(2), 418\u2013428 (2014)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"issue":"3","key":"50_CR12","doi-asserted-by":"publisher","first-page":"840","DOI":"10.1109\/TCYB.2015.2492242","volume":"46","author":"QL Wei","year":"2015","unstructured":"Wei, Q.L., Liu, D.R., Lin, H.: Value iteration adaptive dynamic programming for optimal control of discrete-time nonlinear systems. IEEE Trans. Cybern. 46(3), 840\u2013853 (2015)","journal-title":"IEEE Trans. Cybern."},{"issue":"3","key":"50_CR13","doi-asserted-by":"publisher","first-page":"621","DOI":"10.1109\/TNNLS.2013.2281663","volume":"25","author":"DR Liu","year":"2014","unstructured":"Liu, D.R., Wei, Q.L.: Policy iteration adaptive dynamic programming algorithm for discrete-time nonlinear systems. IEEE Trans. Neural Netw. Learn. Syst. 25(3), 621\u2013634 (2014)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"50_CR14","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"LP Kaelbling","year":"1996","unstructured":"Kaelbling, L.P., Littman, M.L., Moore, A.W.: Reinforcement learning: a survey. J. Artif. Intell. Res. 4, 237\u2013285 (1996)","journal-title":"J. Artif. Intell. Res."},{"issue":"4","key":"50_CR15","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1109\/MCI.2010.938364","volume":"5","author":"I Arel","year":"2010","unstructured":"Arel, I., Rose, D.C., Karnowski, T.P.: Deep machine learning - a new Frontier in artifical intelligence research. IEEE Comput. Intell. Mag. 5(4), 13\u201318 (2010)","journal-title":"IEEE Comput. Intell. Mag."},{"issue":"3\u20134","key":"50_CR16","first-page":"279","volume":"8","author":"CJH Watkins","year":"1992","unstructured":"Watkins, C.J.H., Dayan, P.: Technical note: Q-learning. Mach. Learn. 8(3\u20134), 279\u2013292 (1992)","journal-title":"Mach. Learn."},{"key":"50_CR17","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., Riedmiller, M.: Playing atari with deep reinforcement learning. In: NIPS Deep Learning Workshop, arxiv preprint arXiv:1312.5602 (2013)"}],"container-title":["Lecture Notes in Computer Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-70093-9_50","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T12:25:32Z","timestamp":1710332732000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-70093-9_50"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319700922","9783319700939"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-70093-9_50","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]},"assertion":[{"value":"24 October 2017","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Guangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2017","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 November 2017","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 November 2017","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2017","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.iconip2017.org\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}