乐胖代购免代理版

{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,22]],"date-time":"2024-10-22T21:40:29Z","timestamp":1729633229216,"version":"3.28.0"},"reference-count":54,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2011,4]]},"DOI":"10.1109\/adprl.2011.5967355","type":"proceedings-article","created":{"date-parts":[[2011,8,4]],"date-time":"2011-08-04T01:40:00Z","timestamp":1312422000000},"page":"9-16","source":"Crossref","is-referenced-by-count":17,"title":["Parametric value function approximation: A unified view"],"prefix":"10.1109","author":[{"given":"Matthieu","family":"Geist","sequence":"first","affiliation":[]},{"given":"Olivier","family":"Pietquin","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1147\/rd.33.0210"},{"key":"ref38","article-title":"Toward Off-Policy Learning Control with Function Approximation","author":"maei","year":"2010","journal-title":"27th International Conference on Machine Learning (ICML 2010)"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICUMT.2010.5676598"},{"key":"ref32","first-page":"1207","article-title":"Convergence of Least Squares Temporal Difference Methods Under General Conditions","author":"yu","year":"2010","journal-title":"International Conference on Machine Learning (ICML 2010)"},{"key":"ref31","doi-asserted-by":"crossref","first-page":"233","DOI":"10.1023\/A:1017936530646","article-title":"Technical Update: Least-Squares Temporal Difference Learning","volume":"49","author":"boyan","year":"1999","journal-title":"Machine Learning"},{"key":"ref30","first-page":"1107","article-title":"Least-squares policy iteration","volume":"4","author":"lagoudakis","year":"2003","journal-title":"Journal of Machine Learning Research"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.2991\/agi.2010.22"},{"key":"ref36","first-page":"1204","article-title":"Convergent temporal-difference learning with arbitrary smooth function approximation","volume":"22","author":"maei","year":"2009","journal-title":"Advances in neural information processing systems"},{"key":"ref35","article-title":"Incremental natural actor-critic algorithms","author":"bhatnagar","year":"2007","journal-title":"Conference on Neural Information Processing Systems (NIPS)"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553501"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114723"},{"key":"ref27","article-title":"Managing Uncertainty within Value Function Approximation in Reinforcement Learning","author":"geist","year":"2010","journal-title":"Active Learning and Experimental Design workshop (collocated with AISTATS 2010)"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/BF01211647"},{"journal-title":"Reinforcement Learning An Introduction","year":"1998","author":"sutton","key":"ref2"},{"journal-title":"Neuro-Dynamic Programming","year":"1996","author":"bertsekas","key":"ref1"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2009.4927543"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICUMT.2010.5676597"},{"key":"ref21","doi-asserted-by":"crossref","DOI":"10.1613\/jair.3077","article-title":"Kalman Temporal Differences","author":"geist","year":"2010","journal-title":"Journal of Artificial Intelligence Research (JAIR)"},{"key":"ref24","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-642-10677-4_57","article-title":"Tracking in Reinforcement Learning","author":"geist","year":"2009","journal-title":"Proceedings of the 16th International Conference on Neural Information Processing (ICONIP 2009)"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/1273496.1273591"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP.2010.5589236"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1115\/1.3662552"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2007.899161"},{"key":"ref51","doi-asserted-by":"crossref","first-page":"338","DOI":"10.1109\/ADPRL.2007.368208","article-title":"Kernelizing LSPE(?)","author":"jung","year":"2007","journal-title":"IEEE Symposium on Approximate Dynamic Programming and Reinforcement Learning"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553442"},{"key":"ref53","article-title":"Regularized policy iteration","author":"farahmand","year":"2008","journal-title":"22nd Annual Conference on Neural Information Processing Systems (NIPS 21)"},{"key":"ref52","first-page":"356","article-title":"Incremental Least-Squares Temporal Difference Learning","author":"geramifard","year":"2006","journal-title":"21st Conference of American Association for Artificial Intelligence (AAAI 06)"},{"key":"ref10","first-page":"664","article-title":"An analysis of reinforcement learning with function approximation","author":"melo","year":"2009","journal-title":"Proceedings of the 25th International Conference on Machine Learning"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-007-5038-2"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-377-6.50040-2"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-377-6.50013-X"},{"key":"ref13","first-page":"154","article-title":"Bayes Meets Bellman: The Gaussian Process Approach to Temporal Difference Learning","author":"engel","year":"2003","journal-title":"Proc of the International Conference on Machine Learning (ICML)"},{"journal-title":"Gaussian Processes for Machine Learning","year":"2006","author":"rassmussen","key":"ref14"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2004.830985"},{"article-title":"Algorithms and Representations for Reinforcement Learning","year":"2005","author":"engel","key":"ref16"},{"key":"ref17","article-title":"Bayesian Filtering: From Kalman Filters to Particle Filters, and Beyond","author":"chen","year":"2003","journal-title":"Tech Rep"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/1102351.1102377"},{"key":"ref19","first-page":"759","article-title":"Eligibility Traces for Off-Policy Policy Evaluation","author":"precup","year":"2000","journal-title":"Proceedings of the Seventeenth International Conference on Machine Learning (ICML 00)"},{"key":"ref4","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","author":"puterman","year":"1994","journal-title":"Markov Decision Processes Discrete Stochastic Dynamic Programming"},{"journal-title":"Markov Decision Processes and Artificial Intelligence","year":"2010","author":"sigaud","key":"ref3"},{"key":"ref6","article-title":"A Brief Survey of Parametric Value Function Approximation","author":"geist","year":"2010","journal-title":"Supélec Tech Rep"},{"journal-title":"Dynamic Programming","year":"1957","author":"bellman","key":"ref5"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/9.580874"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/s10626-006-8134-8"},{"key":"ref49","article-title":"Q-Learning Algorithms for Optimal Stopping Based on Least Squares","author":"yu","year":"2007","journal-title":"Proceedings of European Control Conference"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/203330.203343"},{"key":"ref46","first-page":"231","article-title":"Improved Temporal Difference Methods with Linear Function Approximation","author":"bertsekas","year":"2004","journal-title":"Learning and Approximate Dynamic Programming"},{"key":"ref45","doi-asserted-by":"crossref","first-page":"79","DOI":"10.1023\/A:1022192903948","article-title":"Least Squares Policy Evaluation Algorithms with Linear Function Approximation","volume":"13","author":"nedi?","year":"2003","journal-title":"Discrete Event Dynamic Systems Theory and Applications"},{"key":"ref48","article-title":"Projected Equations, Variational Inequalities, and Temporal Difference Methods","author":"bertsekas","year":"2009","journal-title":"Proc IEEE Int Symp Adaptive Dyn Program Reinforcement Learn"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1016\/j.cam.2008.07.037"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017928328829"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1137\/040614384"},{"key":"ref44","first-page":"503","article-title":"Tree-Based Batch Mode Reinforcement Learning","volume":"6","author":"ernst","year":"2005","journal-title":"Journal of Machine Learning Research"},{"key":"ref43","article-title":"Neural Fitted Q Iteration - First Experiences with a Data Efficient Neural Reinforcement Learning Method","author":"riedmiller","year":"2005","journal-title":"Euro Conf on Machine Learning (ECML)"}],"event":{"name":"2011 Ieee Symposium On Adaptive Dynamic Programming And Reinforcement Learning","start":{"date-parts":[[2011,4,11]]},"location":"Paris, France","end":{"date-parts":[[2011,4,15]]}},"container-title":["2011 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/5958170\/5967347\/05967355.pdf?arnumber=5967355","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,11,29]],"date-time":"2021-11-29T15:10:41Z","timestamp":1638198641000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/5967355\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,4]]},"references-count":54,"URL":"https:\/\/doi.org\/10.1109\/adprl.2011.5967355","relation":{},"subject":[],"published":{"date-parts":[[2011,4]]}}}