{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,23]],"date-time":"2024-10-23T08:01:08Z","timestamp":1729670468606,"version":"3.28.0"},"reference-count":33,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016,7]]},"DOI":"10.1109\/ijcnn.2016.7727678","type":"proceedings-article","created":{"date-parts":[[2016,11,8]],"date-time":"2016-11-08T16:15:56Z","timestamp":1478621756000},"page":"3717-3722","source":"Crossref","is-referenced-by-count":0,"title":["Manifold regularization based approximate value iteration for learning control"],"prefix":"10.1109","author":[{"given":"Hongliang","family":"Li","sequence":"first","affiliation":[]},{"given":"Miao","family":"He","sequence":"additional","affiliation":[]},{"given":"Xunan","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Changrui","family":"Ren","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","first-page":"1107","article-title":"Least-squares policy iteration","volume":"4","author":"lagoudakis","year":"2003","journal-title":"Journal of Machine Learning Research"},{"key":"ref32","first-page":"568","article-title":"Error propagation for approximate policy and value iteration","author":"farahmand","year":"2010","journal-title":"Proc Neural Info Process Syst"},{"key":"ref31","first-page":"815","article-title":"Finite-time bounds for fitted value iteration","volume":"9","author":"munos","year":"2008","journal-title":"Journal of Machine Learning Research"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2014.2307349"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1049\/iet-cta.2011.0783"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2013.2280013"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2014.2357896"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2013.2296206"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2014.05.050"},{"key":"ref15","doi-asserted-by":"crossref","DOI":"10.1201\/9781439821091","author":"busoniu","year":"2010","journal-title":"Reinforcement Learning and Dynamic Programming Using Function Approximators (Automation and Control Engineering)"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3_2"},{"journal-title":"Approximate solutions to Markov decision processes","year":"1999","author":"gordon","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017928328829"},{"key":"ref19","first-page":"503","article-title":"Tree-based batch mode reinforcement learning","volume":"6","author":"ernst","year":"2005","journal-title":"Journal of Machine Learning Research"},{"key":"ref28","first-page":"1229","article-title":"Manifold regularization and semi-supervised learning: Some theoretical analyses","volume":"14","author":"niyogi","year":"2013","journal-title":"Journal of Machine Learning Research"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1002\/9780470182963"},{"key":"ref27","first-page":"2399","article-title":"Manifold regularization: A geo-metric framework for learning from labeled and unlabeled examples","volume":"7","author":"belkin","year":"2006","journal-title":"Journal of Machine Learning Research"},{"journal-title":"Neuro-Dynamic Programming","year":"1996","author":"bertsekas","key":"ref3"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/MCI.2009.932261"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2015.7280311"},{"journal-title":"Reinforcement Learning and Approximate Dynamic Programming for Feedback Control","year":"2013","author":"lewis","key":"ref5"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2013.2295351"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2015.2402203"},{"journal-title":"Dynamic Programming","year":"1957","author":"bellman","key":"ref2"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2012.11.021"},{"journal-title":"Reinforcement Learning An Introduction","year":"1998","author":"sutton","key":"ref1"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/11564096_32"},{"key":"ref22","first-page":"1","article-title":"Fitted Q-iteration in contin-uous action-space MDPs","author":"antos","year":"2007","journal-title":"Proc Neural Info Process Syst"},{"key":"ref21","first-page":"725","article-title":"Regularized fitted Q-iteration for planning in continuous-space Marko-vian Decision Problems","author":"farahmand","year":"2009","journal-title":"Proc Amer Control Conf e"},{"key":"ref24","first-page":"2169","article-title":"Proto-value functions: a Laplacian framework for learning representation and control in Markov decision processes","volume":"8","author":"mahadevan","year":"2007","journal-title":"Journal of Machine Learning Research"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2013.2247418"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2014.07.008"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1561\/2200000003"}],"event":{"name":"2016 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2016,7,24]]},"location":"Vancouver, BC, Canada","end":{"date-parts":[[2016,7,29]]}},"container-title":["2016 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7593175\/7726591\/07727678.pdf?arnumber=7727678","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,24]],"date-time":"2017-06-24T23:07:17Z","timestamp":1498345637000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7727678\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,7]]},"references-count":33,"URL":"https:\/\/doi.org\/10.1109\/ijcnn.2016.7727678","relation":{},"subject":[],"published":{"date-parts":[[2016,7]]}}}