乐胖代购免代理版

{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T04:53:47Z","timestamp":1725684827204},"publisher-location":"Berlin, Heidelberg","reference-count":31,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642299452"},{"type":"electronic","value":"9783642299469"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-29946-9_19","type":"book-chapter","created":{"date-parts":[[2012,5,18]],"date-time":"2012-05-18T17:01:49Z","timestamp":1337360509000},"page":"177-188","source":"Crossref","is-referenced-by-count":2,"title":["Robust Bayesian Reinforcement Learning through Tight Lower Bounds"],"prefix":"10.1007","author":[{"given":"Christos","family":"Dimitrakakis","sequence":"first","affiliation":[]}],"member":"297","reference":[{"key":"19_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel, P., Ng, A.Y.: Apprenticeship learning via inverse reinforcement learning. In: Proceedings of the 21st International Conference on Machine Learning (ICML 2004) (2004)","DOI":"10.1145\/1015330.1015430"},{"key":"19_CR2","unstructured":"Asmuth, J., Li, L., Littman, M.L., Nouri, A., Wingate, D.: A Bayesian sampling approach to exploration in reinforcement learning. In: UAI 2009 (2009)"},{"key":"19_CR3","unstructured":"Auer, P., Jaksch, T., Ortner, R.: Near-optimal regret bounds for reinforcement learning. In: Proceedings of NIPS 2008 (2008)"},{"key":"19_CR4","first-page":"213","volume":"3","author":"R.I. Brafman","year":"2003","unstructured":"Brafman, R.I., Tennenholtz, M.: R-max-a general polynomial time algorithm for near-optimal reinforcement learning. The Journal of Machine Learning Research\u00a03, 213\u2013231 (2003)","journal-title":"The Journal of Machine Learning Research"},{"issue":"4","key":"19_CR5","doi-asserted-by":"publisher","first-page":"785","DOI":"10.1287\/opre.1090.0796","volume":"58","author":"D.B. Brown","year":"2010","unstructured":"Brown, D.B., Smith, J.E., Sun, P.: Information relaxations and duality in stochastic dynamic programs. Operations Research\u00a058(4), 785\u2013801 (2010)","journal-title":"Operations Research"},{"key":"19_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"200","DOI":"10.1007\/978-3-642-15880-3_19","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"P.S. Castro","year":"2010","unstructured":"Castro, P.S., Precup, D.: Smarter Sampling in Model-Based Bayesian Reinforcement Learning. In: Balc\u00e1zar, J.L., Bonchi, F., Gionis, A., Sebag, M. (eds.) ECML PKDD 2010. LNCS, vol.\u00a06321, pp. 200\u2013214. Springer, Heidelberg (2010)"},{"issue":"6","key":"19_CR7","doi-asserted-by":"publisher","first-page":"850","DOI":"10.1287\/opre.51.6.850.24925","volume":"51","author":"D.P. Farias de","year":"2003","unstructured":"de Farias, D.P., Van Roy, B.: The linear programming approach to approximate dynamic programming. Operations Research\u00a051(6), 850\u2013865 (2003)","journal-title":"Operations Research"},{"issue":"3","key":"19_CR8","doi-asserted-by":"publisher","first-page":"462","DOI":"10.1287\/moor.1040.0094","volume":"293","author":"D.P. Farias de","year":"2004","unstructured":"de Farias, D.P., Van Roy, B.: On constraint sampling in the linear programming approach to approximate dynamic programming. Mathematics of Operations Research\u00a0293(3), 462\u2013478 (2004)","journal-title":"Mathematics of Operations Research"},{"key":"19_CR9","unstructured":"Dearden, R., Friedman, N., Russell, S.J.: Bayesian Q-learning. In: AAAI\/IAAI, pp. 761\u2013768 (1998)"},{"key":"19_CR10","first-page":"150","volume-title":"Proceedings of the 15th Conference on Uncertainty in Artificial Intelligence (UAI 1999)","author":"R. Dearden","year":"1999","unstructured":"Dearden, R., Friedman, N., Andre, D.: Model based Bayesian exploration. In: Laskey, K.B., Prade, H. (eds.) Proceedings of the 15th Conference on Uncertainty in Artificial Intelligence (UAI 1999), July \u00a030-August\u00a01, pp. 150\u2013159. Morgan Kaufmann, San Francisco (1999)"},{"key":"19_CR11","unstructured":"DeGroot, M.H.: Optimal Statistical Decisions. John Wiley & Sons (1970)"},{"key":"19_CR12","unstructured":"Dimitrakakis, C.: Complexity of stochastic branch and bound methods for belief tree search in Bayesian reinforcement learning. In: 2nd International Conference on Agents and Artificial Intelligence (ICAART 2010), Valencia, Spain, pp. 259\u2013264. ISNTICC, Springer (2009)"},{"key":"19_CR13","doi-asserted-by":"crossref","unstructured":"Dimitrakakis, C., Rothkopf, C.A.: Bayesian multitask inverse reinforcement learning. In: European Workshop on Reinforcement Learning, EWRL 2011 (2011)","DOI":"10.1007\/978-3-642-29946-9_27"},{"key":"19_CR14","unstructured":"Duff, M.O.: Optimal Learning Computational Procedures for Bayes-adaptive Markov Decision Processes. PhD thesis, University of Massachusetts at Amherst (2002)"},{"key":"19_CR15","doi-asserted-by":"crossref","unstructured":"Efron, B., Tibshirani, R.J.: An Introduction to the Bootstrap. Monographs on Statistics & Applied Probability, vol.\u00a057. Chapmann & Hall, ISBN 0412042312 (November 1993)","DOI":"10.1007\/978-1-4899-4541-9"},{"key":"19_CR16","unstructured":"Fard, M.M., Pineau, J.: PAC-Bayesian model selection for reinforcement learning. In: NIPS 2010 (2010)"},{"key":"19_CR17","unstructured":"Furmston, T., Barber, D.: Variational methods for reinforcement learning. In: Teh, Y.W., Titterington, M. (eds.) Proceedings of the 13th International Conference on Artificial Intelligence and Statistics (AISTATS). JMLR: W&CP, vol.\u00a09, pp. 241\u2013248"},{"key":"19_CR18","volume-title":"Multi-armed Bandit Allocation Indices","author":"C.J. Gittins","year":"1989","unstructured":"Gittins, C.J.: Multi-armed Bandit Allocation Indices. John Wiley & Sons, New Jersey (1989)"},{"key":"19_CR19","first-page":"1563","volume":"11","author":"T. Jacksh","year":"2010","unstructured":"Jacksh, T., Ortner, R., Auer, P.: Near-optimal regret bounds for reinforcement learning. Journal of Machine Learning Research\u00a011, 1563\u20131600 (2010)","journal-title":"Journal of Machine Learning Research"},{"key":"19_CR20","unstructured":"Kaelbling, L.P.: Learning in Embedded Systems. PhD thesis, ept of Computer Science, Stanford (1990)"},{"key":"19_CR21","first-page":"260","volume-title":"Proc. 15th International Conf. on Machine Learning","author":"M. Kearns","year":"1998","unstructured":"Kearns, M., Singh, S.: Near-optimal reinforcement learning in polynomial time. In: Proc. 15th International Conf. on Machine Learning, pp. 260\u2013268. Morgan Kaufmann, San Francisco (1998)"},{"key":"19_CR22","unstructured":"Ng, A.Y., Russell, S.: Algorithms for inverse reinforcement learning. In: Proc. 17th International Conf. on Machine Learning, pp. 663\u2013670. Morgan Kaufmann (2000)"},{"key":"19_CR23","doi-asserted-by":"publisher","first-page":"697","DOI":"10.1145\/1143844.1143932","volume-title":"ICML 2006","author":"P. Poupart","year":"2006","unstructured":"Poupart, P., Vlassis, N., Hoey, J., Regan, K.: An analytic solution to discrete Bayesian reinforcement learning. In: ICML 2006, pp. 697\u2013704. ACM Press, New York (2006)"},{"issue":"3","key":"19_CR24","doi-asserted-by":"publisher","first-page":"1116","DOI":"10.1137\/050642885","volume":"46","author":"L.C.G. Rogers","year":"2008","unstructured":"Rogers, L.C.G.: Pathwise stochastic optimal control. SIAM Journal on Control and Optimization\u00a046(3), 1116\u20131132 (2008)","journal-title":"SIAM Journal on Control and Optimization"},{"key":"19_CR25","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1007\/978-3-642-23808-6_3","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"C.A. Rothkopf","year":"2011","unstructured":"Rothkopf, C.A., Dimitrakakis, C.: Preference Elicitation and Inverse Reinforcement Learning. In: Gunopulos, D., Hofmann, T., Malerba, D., Vazirgiannis, M. (eds.) ECML PKDD 2011. LNCS, vol.\u00a06913, pp. 34\u201348. Springer, Heidelberg (2011)"},{"key":"19_CR26","doi-asserted-by":"crossref","unstructured":"Snel, M., Whiteson, S.: Multi-Task Reinforcement Learning: Shaping and Feature Selection. In: EWRL 2011 (2011)","DOI":"10.1007\/978-3-642-29946-9_24"},{"issue":"8","key":"19_CR27","doi-asserted-by":"publisher","first-page":"1309","DOI":"10.1016\/j.jcss.2007.08.009","volume":"74","author":"A.L. Strehl","year":"2008","unstructured":"Strehl, A.L., Littman, M.L.: An analysis of model-based interval estimation for Markov decision processes. Journal of Computer and System Sciences\u00a074(8), 1309\u20131331 (2008)","journal-title":"Journal of Computer and System Sciences"},{"key":"19_CR28","first-page":"2413","volume":"10","author":"A.L. Strehl","year":"2009","unstructured":"Strehl, A.L., Li, L., Littman, M.L.: Reinforcement learning in finite MDPs: PAC analysis. The Journal of Machine Learning Research\u00a010, 2413\u20132444 (2009)","journal-title":"The Journal of Machine Learning Research"},{"key":"19_CR29","unstructured":"Strens, M.: A bayesian framework for reinforcement learning. In: ICML 2000, pp. 943\u2013950. Citeseer (2000)"},{"key":"19_CR30","doi-asserted-by":"publisher","first-page":"956","DOI":"10.1145\/1102351.1102472","volume-title":"ICML 2005","author":"T. Wang","year":"2005","unstructured":"Wang, T., Lizotte, D., Bowling, M., Schuurmans, D.: Bayesian sparse sampling for on-line reward optimization. In: ICML 2005, pp. 956\u2013963. ACM, New York (2005)"},{"key":"19_CR31","unstructured":"Wyatt, J.: Exploration control in reinforcement learning using optimistic model selection. In: Danyluk, A., Brodley, C. (eds.) Proceedings of the Eighteenth International Conference on Machine Learning (2001)"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-29946-9_19.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,5,4]],"date-time":"2021-05-04T11:22:26Z","timestamp":1620127346000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-29946-9_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642299452","9783642299469"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-29946-9_19","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}