乐胖代购免代理版

{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T04:53:37Z","timestamp":1725684817263},"publisher-location":"Berlin, Heidelberg","reference-count":27,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642299452"},{"type":"electronic","value":"9783642299469"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-29946-9_11","type":"book-chapter","created":{"date-parts":[[2012,5,18]],"date-time":"2012-05-18T17:01:49Z","timestamp":1337360509000},"page":"78-88","source":"Crossref","is-referenced-by-count":4,"title":["Reinforcement Learning with a Bilinear Q Function"],"prefix":"10.1007","author":[{"given":"Charles","family":"Elkan","sequence":"first","affiliation":[]}],"member":"297","reference":[{"key":"11_CR1","unstructured":"Chakraborty, B., Strecher, V., Murphy, S.: Bias correction and confidence intervals for fitted Q-iteration. In: NIPS Workshop on Model Uncertainty and Risk in Reinforcement Learning (2008)"},{"key":"11_CR2","first-page":"1471","volume":"11","author":"Y.W. Chang","year":"2010","unstructured":"Chang, Y.W., Hsieh, C.J., Chang, K.W., Ringgaard, M., Lin, C.J.: Training and testing low-degree polynomial data mappings via linear SVM. Journal of Machine Learning Research\u00a011, 1471\u20131490 (2010)","journal-title":"Journal of Machine Learning Research"},{"issue":"6","key":"11_CR3","doi-asserted-by":"publisher","first-page":"850","DOI":"10.1287\/opre.51.6.850.24925","volume":"51","author":"D.P. Farias De","year":"2003","unstructured":"De Farias, D.P., Van Roy, B.: The linear programming approach to approximate dynamic programming. Operations Research\u00a051(6), 850\u2013865 (2003)","journal-title":"Operations Research"},{"key":"11_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-642-05224-8_1","volume-title":"Advances in Machine Learning","author":"T.G. Dietterich","year":"2009","unstructured":"Dietterich, T.G.: Machine Learning and Ecosystem Informatics: Challenges and Opportunities. In: Zhou, Z.-H., Washio, T. (eds.) ACML 2009. LNCS, vol.\u00a05828, pp. 1\u20135. Springer, Heidelberg (2009)"},{"issue":"1","key":"11_CR5","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1023\/A:1007694015589","volume":"43","author":"S. D\u017eeroski","year":"2001","unstructured":"D\u017eeroski, S., De Raedt, L., Driessens, K.: Relational reinforcement learning. Machine Learning\u00a043(1), 7\u201352 (2001)","journal-title":"Machine Learning"},{"issue":"1","key":"11_CR6","first-page":"503","volume":"6","author":"D. Ernst","year":"2005","unstructured":"Ernst, D., Geurts, P., Wehenkel, L.: Tree-based batch mode reinforcement learning. Journal of Machine Learning Research\u00a06(1), 503\u2013556 (2005)","journal-title":"Journal of Machine Learning Research"},{"key":"11_CR7","unstructured":"Gordon, G.J.: Stable fitted reinforcement learning. In: Advances in Neural Information Processing Systems (NIPS), pp. 1052\u20131058 (1995a)"},{"key":"11_CR8","doi-asserted-by":"crossref","unstructured":"Gordon, G.J.: Stable function approximation in dynamic programming. In: Proceedings of the International Conference on Machine Learning (ICML), pp. 261\u2013268 (1995b)","DOI":"10.1016\/B978-1-55860-377-6.50040-2"},{"key":"11_CR9","unstructured":"Hannah, L.A., Dunson, D.B.: Approximate dynamic programming for storage problems. In: Proceedings of the International Conference on Machine Learning, ICML (2011)"},{"key":"11_CR10","unstructured":"Judd, K.L., Solnick, A.J.: Numerical dynamic programming with shape-preserving splines. Unpublished paper from the Hoover Institution (1994), \n \n http:\/\/bucky.stanford.edu\/papers\/dpshape.pdf"},{"key":"11_CR11","first-page":"1107","volume":"4","author":"M.G. Lagoudakis","year":"2003","unstructured":"Lagoudakis, M.G., Parr, R.: Least-squares policy iteration. Journal of Machine Learning Research\u00a04, 1107\u20131149 (2003)","journal-title":"Journal of Machine Learning Research"},{"key":"11_CR12","unstructured":"Lazaric, A., Restelli, M., Bonarini, A.: Reinforcement learning in continuous action spaces through sequential Monte Carlo methods. In: Advances in Neural Information Processing Systems 20 (NIPS). MIT Press (2007)"},{"key":"11_CR13","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1007\/978-3-540-87481-2_5","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"F.S. Melo","year":"2008","unstructured":"Melo, F.S., Lopes, M.: Fitted Natural Actor-Critic: A New Algorithm for Continuous State-Action MDPs. In: Daelemans, W., Goethals, B., Morik, K. (eds.) ECML PKDD 2008, Part II. LNCS (LNAI), vol.\u00a05212, pp. 66\u201381. Springer, Heidelberg (2008)"},{"key":"11_CR14","first-page":"1073","volume":"6","author":"S.A. Murphy","year":"2005","unstructured":"Murphy, S.A.: A generalization error for Q-learning. Journal of Machine Learning Research\u00a06, 1073\u20131097 (2005)","journal-title":"Journal of Machine Learning Research"},{"issue":"1","key":"11_CR15","first-page":"15","volume":"27","author":"G. Neumann","year":"2008","unstructured":"Neumann, G.: Batch-mode reinforcement learning for continuous state spaces: A survey. \u00d6GAI Journal\u00a027(1), 15\u201323 (2008)","journal-title":"\u00d6GAI Journal"},{"key":"11_CR16","doi-asserted-by":"crossref","unstructured":"Pazis, J., Lagoudakis, M.G.: Binary action search for learning continuous-action control policies. In: Proceedings of the 26th Annual International Conference on Machine Learning (ICML), pp. 100\u2013107 (2009)","DOI":"10.1145\/1553374.1553476"},{"key":"11_CR17","unstructured":"Pazis, J., Parr, R.: Generalized value functions for large action sets. In: Proceedings of the International Conference on Machine Learning, ICML (2011)"},{"key":"11_CR18","doi-asserted-by":"crossref","unstructured":"Powell, W.B.: Approximate Dynamic Programming. John Wiley & Sons, Inc. (2007)","DOI":"10.1002\/9780470182963"},{"key":"11_CR19","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1007\/11564096_32","volume-title":"Machine Learning: ECML 2005","author":"M. Riedmiller","year":"2005","unstructured":"Riedmiller, M.: Neural Fitted Q Iteration - First Experiences with a Data Efficient Neural Reinforcement Learning Method. In: Gama, J., Camacho, R., Brazdil, P.B., Jorge, A.M., Torgo, L. (eds.) ECML 2005. LNCS (LNAI), vol.\u00a03720, pp. 317\u2013328. Springer, Heidelberg (2005)"},{"issue":"5","key":"11_CR20","doi-asserted-by":"publisher","first-page":"683","DOI":"10.1287\/mnsc.1050.0504","volume":"52","author":"D.I. Simester","year":"2006","unstructured":"Simester, D.I., Sun, P., Tsitsiklis, J.N.: Dynamic catalog mailing policies. Management Science\u00a052(5), 683\u2013696 (2006)","journal-title":"Management Science"},{"key":"11_CR21","unstructured":"Smart, W.D., Kaelbling, L.P.: Practical reinforcement learning in continuous spaces. In: Proceedings of the 17th International Conference on Machine Learning (ICML), pp. 903\u2013910 (2000)"},{"issue":"2","key":"11_CR22","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1007\/s10614-007-9111-5","volume":"31","author":"J. Stachurski","year":"2008","unstructured":"Stachurski, J.: Continuous state dynamic programming via nonexpansive approximation. Computational Economics\u00a031(2), 141\u2013160 (2008)","journal-title":"Computational Economics"},{"key":"11_CR23","doi-asserted-by":"crossref","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement learning: An introduction. MIT Press (1998)","DOI":"10.1109\/TNN.1998.712192"},{"key":"11_CR24","doi-asserted-by":"crossref","unstructured":"Todorov, E.: Efficient computation of optimal actions. In: Proceedings of the National Academy of Sciences\u00a0106(28), 11478\u201311483 (2009)","DOI":"10.1073\/pnas.0710743106"},{"key":"11_CR25","unstructured":"van Hasselt, H.P.: Double Q-learning. In: Advances in Neural Information Processing Systems (NIPS), vol.\u00a023 (2010)"},{"key":"11_CR26","first-page":"32","volume":"21","author":"P. Viviani","year":"1995","unstructured":"Viviani, P., Flash, T.: Minimum-jerk, two-thirds power law, and isochrony: converging approaches to movement planning. Journal of Experimental Psychology\u00a021, 32\u201353 (1995)","journal-title":"Journal of Experimental Psychology"},{"key":"11_CR27","unstructured":"Yu, V.: Approximate dynamic programming for blood inventory management. Honors thesis, Princeton University (2007)"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-29946-9_11.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,5,4]],"date-time":"2021-05-04T11:22:20Z","timestamp":1620127340000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-29946-9_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642299452","9783642299469"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-29946-9_11","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}