{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,22]],"date-time":"2024-10-22T18:27:29Z","timestamp":1729621649125,"version":"3.28.0"},"reference-count":25,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2011,12]]},"DOI":"10.1109\/cdc.2011.6160851","type":"proceedings-article","created":{"date-parts":[[2012,3,7]],"date-time":"2012-03-07T16:18:31Z","timestamp":1331137111000},"page":"148-155","source":"Crossref","is-referenced-by-count":2,"title":["TD-learning with exploration"],"prefix":"10.1109","author":[{"given":"Sean P.","family":"Meyn","sequence":"first","affiliation":[]},{"given":"Amit","family":"Surana","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"19","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2011.5991485"},{"key":"17","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511526237"},{"key":"18","article-title":"On-line Q-learning using connectionist systems","author":"rummery","year":"1994","journal-title":"Technical Report 166 Cambridge Univ Dept Eng"},{"key":"15","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2010.5717192"},{"journal-title":"Approximate and Data-driven Dynamic Programming for Queueing Networks","year":"2006","author":"moallemi","key":"16"},{"key":"13","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511804410"},{"key":"14","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4471-3267-7"},{"key":"11","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1007\/s10479-005-5732-z","article-title":"Basis function adaptation in temporal difference reinforcement learning","volume":"134","author":"mannor","year":"2005","journal-title":"Annals of Oper Res"},{"key":"12","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2009.5399753"},{"key":"21","doi-asserted-by":"publisher","DOI":"10.2200\/S00268ED1V01Y201005AIM009"},{"key":"20","doi-asserted-by":"publisher","DOI":"10.1023\/A:1007678930559"},{"key":"22","doi-asserted-by":"publisher","DOI":"10.1109\/9.580874"},{"journal-title":"Learning from delayed rewards","year":"1989","author":"watkins","key":"23"},{"key":"24","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"25","article-title":"Q-learning algorithms for optimal stopping based on least squares","author":"yu","year":"2007","journal-title":"Proc European Control Conference (ECC)"},{"key":"3","doi-asserted-by":"crossref","DOI":"10.1007\/978-93-86279-38-5","author":"borkar","year":"2008","journal-title":"Stochastic Approximation A Dynamical Systems Viewpoint"},{"journal-title":"Neuro-Dynamic Programming","year":"1996","author":"bertsekas","key":"2"},{"key":"10","doi-asserted-by":"publisher","DOI":"10.1214\/EJP.v10-231"},{"key":"1","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2010.5717930"},{"key":"7","article-title":"Feature selection for neuro-dynamic programming","author":"huang","year":"2011","journal-title":"Reinforcement Learning and Approximate Dynamic Programming for Feedback Control"},{"key":"6","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2011.110922"},{"key":"5","doi-asserted-by":"publisher","DOI":"10.1287\/opre.49.5.720.10605"},{"key":"4","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2009.5399685"},{"key":"9","doi-asserted-by":"publisher","DOI":"10.1214\/aoap\/1042765670"},{"key":"8","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2007.904450"}],"event":{"name":"2011 50th IEEE Conference on Decision and Control and European Control Conference (CDC-ECC 2011)","start":{"date-parts":[[2011,12,12]]},"location":"Orlando, FL, USA","end":{"date-parts":[[2011,12,15]]}},"container-title":["IEEE Conference on Decision and Control and European Control Conference"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/6149620\/6159299\/06160851.pdf?arnumber=6160851","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,24]],"date-time":"2019-06-24T21:56:03Z","timestamp":1561413363000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6160851\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,12]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1109\/cdc.2011.6160851","relation":{},"subject":[],"published":{"date-parts":[[2011,12]]}}}