{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,8,3]],"date-time":"2024-08-03T05:57:50Z","timestamp":1722664670479},"reference-count":98,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2013,6,1]],"date-time":"2013-06-01T00:00:00Z","timestamp":1370044800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2013,6]]},"DOI":"10.1109\/tnnls.2013.2247418","type":"journal-article","created":{"date-parts":[[2013,4,5]],"date-time":"2013-04-05T20:29:12Z","timestamp":1365193752000},"page":"845-867","source":"Crossref","is-referenced-by-count":48,"title":["Algorithmic Survey of Parametric Value Function Approximation"],"prefix":"10.1109","volume":"24","author":[{"given":"M.","family":"Geist","sequence":"first","affiliation":[]},{"given":"O.","family":"Pietquin","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","author":"van der merwe","year":"2004","journal-title":"Sigma-point Kalman filters for probabilistic inference in dynamic state-space models"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1016\/S0005-1098(00)00089-3"},{"key":"ref33","author":"anderson","year":"1984","journal-title":"An Introduction to Multivariate Statistical Analysis"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1023\/A:1023673105317"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-007-5038-2"},{"key":"ref30","first-page":"664","article-title":"An analysis of reinforcement learning with function approximation","author":"melo","year":"2009","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2002.1025369"},{"key":"ref36","first-page":"182","article-title":"A new extension of the Kalman filter to nonlinear systems","author":"julier","year":"1997","journal-title":"Proc 3rd Int Symp Aerosp Defense Sens Simul Controls"},{"key":"ref35","first-page":"759","article-title":"Eligibility traces for off-policy policy evaluation","author":"precup","year":"2000","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/1102351.1102377"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2009.4927542"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/203330.203343"},{"key":"ref29","author":"yu","year":"2010","journal-title":"Least squares temporal difference methods An analysis under general conditions"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-377-6.50040-2"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/BF00115009"},{"key":"ref21","first-page":"503","article-title":"Tree-based batch mode reinforcement learning","volume":"6","author":"ernst","year":"2005","journal-title":"J Mach Learn Res"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref23","author":"rummery","year":"1994","journal-title":"On-line Q-learning using connectionist systems"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/9.580874"},{"key":"ref25","first-page":"154","article-title":"Bayes meets Bellman: The Gaussian process approach to temporal difference learning","author":"engel","year":"2003","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1147\/rd.33.0210"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1137\/040614384"},{"key":"ref59","article-title":"The smoothed approximate linear program","author":"desai","year":"2009","journal-title":"Advances in neural information processing systems"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1287\/opre.51.6.850.24925"},{"key":"ref57","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/ADPRL.2009.4927518","article-title":"Projected equations, variational inequalities, and temporal difference methods","author":"bertsekas","year":"2009","journal-title":"Proc IEEE Int Symp Adapt Dynamic Program Reinforce Learn"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1016\/j.cam.2008.07.037"},{"key":"ref55","first-page":"231","author":"bertsekas","year":"2004","journal-title":"Learning and Approximate Dynamic Programming"},{"key":"ref54","author":"bertsekas","year":"1996","journal-title":"Temporal Differences-Based Policy Iteration and Applications in Neuro-Dynamic Programming"},{"key":"ref53","first-page":"317","article-title":"Neural fitted q iteration—First experiences with a data efficient neural reinforcement learning method","author":"riedmiller","year":"2005","journal-title":"Proc Eur Conf Mach Learn"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017928328829"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICUMT.2010.5676597"},{"key":"ref4","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-031-01551-9","author":"szepesvri","year":"2010","journal-title":"Algorithms for Reinforcement Learning"},{"key":"ref3","author":"sigaud","year":"2010","journal-title":"Markov Decision Processes and Artificial Intelligence"},{"key":"ref6","doi-asserted-by":"crossref","DOI":"10.1201\/9781439821091","author":"busoniu","year":"2010","journal-title":"Reinforcement Learning and Dynamic Programming Using Function Approximators"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/s10626-006-8134-8"},{"key":"ref49","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316726","author":"ripley","year":"1987","journal-title":"Stochastic Simulation"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1002\/9780470182963"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-377-6.50013-X"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-009-5128-4"},{"key":"ref45","first-page":"356","article-title":"Incremental least-squares temporal difference learning","author":"geramifard","year":"2006","journal-title":"Proc Nat Conf Artif Intell"},{"key":"ref48","author":"sutton","year":"2008","journal-title":"Advances in neural information processing systems"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP.2010.5589236"},{"key":"ref42","first-page":"1301","article-title":"Uncertainty management for on-line optimisation of a POMDP-based large-scale spoken dialogue system","author":"daubigney","year":"2011","journal-title":"Proc Annu Conf Int Speech Commun Assoc"},{"key":"ref41","first-page":"157","article-title":"Managing uncertainty within the KTD framework","volume":"15","author":"geist","year":"2011","journal-title":"J Mach Learn Res (W&C Proc )"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/BF01211647"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1162\/jmlr.2003.4.6.1107"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017936530646"},{"key":"ref72","first-page":"1878","article-title":"Sample efficient on-line learning of optimal dialogue policies with Kalman temporal differences","author":"pietquin","year":"2011","journal-title":"Proc Int Joint Conf Artif Intell"},{"key":"ref71","first-page":"1","article-title":"Finite-sample analysis of LSTD","author":"lazaric","year":"2010","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref70","first-page":"1","article-title":"Should one compute the temporal difference fix point or minimize the Bellman residual? The unified oblique projection view","author":"scherrer","year":"2010","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pcbi.1002055"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1145\/1966407.1966412"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6289040"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1145\/1273496.1273591"},{"key":"ref78","first-page":"142","article-title":"Bias-variance error bounds for temporal difference updates","author":"kearns","year":"2000","journal-title":"Proc Conf Learn Theory"},{"key":"ref79","author":"scherrer","year":"2012","journal-title":"Recursive least-squares off-policy learning with eligibility traces"},{"key":"ref60","first-page":"1","article-title":"Approximate modified policy iteration","author":"scherrer","year":"2012","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref62","doi-asserted-by":"crossref","first-page":"834","DOI":"10.1109\/TSMC.1983.6313077","article-title":"Neuronlike adaptive elements that can solve difficult learning control problems","volume":"13","author":"barto","year":"1983","journal-title":"IEEE Trans Syst Man Cybern"},{"key":"ref61","first-page":"267","article-title":"Approximately optimal approximate reinforcement learning","author":"kakade","year":"2002","journal-title":"Proc 19th Int Conf Mach Learn"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012901385691"},{"key":"ref64","first-page":"1057","author":"sutton","year":"1999","journal-title":"Neural Information Processing Systems"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2007.11.026"},{"key":"ref66","author":"bhatnagar","year":"2007","journal-title":"Advances in neural information processing systems"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-16292-3_21"},{"key":"ref68","first-page":"11","article-title":"Recursive least-squares learning with eligibility traces","author":"scherrer","year":"2011","journal-title":"Proc Eur Workshop Reinforce Learn"},{"key":"ref2","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref69","first-page":"560","article-title":"Error bounds for approximate policy iteration","author":"munos","year":"2003","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref1","author":"bertsekas","year":"1996","journal-title":"Neuro-Dynamic Programming"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/9.24227"},{"key":"ref94","first-page":"2169","article-title":"Proto-value functions: A Laplacian framework for learning representation and control in Markov decision processes","volume":"8","author":"mahadevan","year":"2007","journal-title":"J Mach Learn Res"},{"key":"ref93","doi-asserted-by":"crossref","first-page":"687","DOI":"10.1613\/jair.3021","article-title":"Automatic induction of bellman-error features for probabilistic planning","volume":"38","author":"wu","year":"2010","journal-title":"J Artif Intell Res"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390251"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1145\/1273496.1273589"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143901"},{"key":"ref98","author":"barreto","year":"2011","journal-title":"Advances in neural information processing systems"},{"key":"ref96","first-page":"361","author":"singh","year":"1995","journal-title":"Advances in neural information processing systems"},{"key":"ref97","author":"ma","year":"2010","journal-title":"Convergence analysis of kernel-based on-policy approximate policy iteration algorithms for Markov decision processes with continuous multidimensional states and actions"},{"key":"ref10","author":"engel","year":"2005","journal-title":"Algorithms and Representations for Reinforcement Learning"},{"key":"ref11","doi-asserted-by":"crossref","first-page":"483","DOI":"10.1613\/jair.3077","article-title":"Kalman temporal differences","volume":"39","author":"geist","year":"2010","journal-title":"J Artif Intell Res"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114723"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICUMT.2010.5676598"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553501"},{"key":"ref15","first-page":"1204","author":"maei","year":"2009","journal-title":"Advances in neural information processing systems"},{"key":"ref16","first-page":"1","article-title":"${\\rm GQ}(\\lambda)$<\/tex><\/formula>: A general gradient algorithm for temporal-differences prediction learning with eligibility traces","author":"maei","year":"2010","journal-title":"Proc Conf Artif General Intell"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553504"},{"key":"ref17","first-page":"1","article-title":"Toward off-policy learning control with function approximation","author":"maei","year":"2010","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref81","doi-asserted-by":"crossref","first-page":"338","DOI":"10.1109\/ADPRL.2007.368208","article-title":"Kernelizing ${\\rm LSPE}(\\lambda)$<\/tex><\/formula>","author":"jung","year":"2007","journal-title":"Proc IEEE Symp Approx Dynamic Program Reinforce Learn"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022192903948"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553442"},{"key":"ref19","first-page":"1","article-title":"Q-learning algorithms for optimal stopping based on least squares","author":"yu","year":"2007","journal-title":"Proc Eur Control Conf"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2007.368210"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2007.899161"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1007\/s10479-005-5732-z"},{"key":"ref85","first-page":"1009","author":"johns","year":"2010","journal-title":"Advances in neural information processing systems"},{"key":"ref86","first-page":"1","article-title":" $\\ell_{1}$<\/tex><\/formula>-penalized projected Bellman residual","author":"geist","year":"2011","journal-title":"Proc Eur Workshop Reinforce Learn"},{"key":"ref87","first-page":"1","article-title":"Regularized least squares temporal difference learning with nested $\\ell_{2}$<\/tex><\/formula> and $\\ell_{1}$<\/tex><\/formula> penalization","author":"hoffman","year":"2011","journal-title":"Proc Eur Workshop Reinforce Learn"},{"key":"ref88","first-page":"1","article-title":"A Dantzig selector approach to temporal difference learning","author":"geist","year":"2012","journal-title":"Proc Int Conf Mach Learn"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/6494635\/06471847.pdf?arnumber=6471847","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,8]],"date-time":"2024-05-08T11:37:06Z","timestamp":1715168226000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6471847\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,6]]},"references-count":98,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2013.2247418","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2013,6]]}}}