{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T04:53:51Z","timestamp":1725684831158},"publisher-location":"Berlin, Heidelberg","reference-count":23,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642299452"},{"type":"electronic","value":"9783642299469"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-29946-9_27","type":"book-chapter","created":{"date-parts":[[2012,5,18]],"date-time":"2012-05-18T17:01:49Z","timestamp":1337360509000},"page":"273-284","source":"Crossref","is-referenced-by-count":22,"title":["Bayesian Multitask Inverse Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Christos","family":"Dimitrakakis","sequence":"first","affiliation":[]},{"given":"Constantin A.","family":"Rothkopf","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"27_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel, P., Ng, A.Y.: Apprenticeship learning via inverse reinforcement learning. In: ICML 2004 (2004)","DOI":"10.1145\/1015330.1015430"},{"key":"27_CR2","unstructured":"Babes, M., Marivate, V., Littman, M., Subramanian, K.: Apprenticeship learning about multiple intentions. In: ICML 2011 (2011)"},{"key":"27_CR3","unstructured":"Birlutiu, A., Groot, P., Heskes, T.: Multi-task preference learning with gaussian processes. In: ESANN 2009, pp. 123\u2013128 (2009)"},{"key":"27_CR4","unstructured":"Boutilier, C.: A POMDP formulation of preference elicitation problems. In: AAAI 2002, pp. 239\u2013246 (2002)"},{"key":"27_CR5","first-page":"691","volume":"12","author":"J. Choi","year":"2011","unstructured":"Choi, J., Kim, K.-E.: Inverse reinforcement learning in partially observable environments. Journal of Machine Learning Research\u00a012, 691\u2013730 (2011)","journal-title":"Journal of Machine Learning Research"},{"key":"27_CR6","doi-asserted-by":"crossref","unstructured":"Chu, W., Ghahramani, Z.: Preference learning with Gaussian processes. In: ICML 2005 (2005)","DOI":"10.1145\/1102351.1102369"},{"key":"27_CR7","doi-asserted-by":"crossref","unstructured":"Coates, A., Abbeel, P., Ng, A.Y.: Learning for control from multiple demonstrations. In: ICML 2008, pp. 144\u2013151. ACM (2008)","DOI":"10.1145\/1390156.1390175"},{"key":"27_CR8","unstructured":"Dearden, R., Friedman, N., Russell, S.J.: Bayesian Q-learning. In: AAAI\/IAAI, pp. 761\u2013768 (1998)"},{"key":"27_CR9","doi-asserted-by":"crossref","unstructured":"Dimitrakakis, C.: Robust Bayesian reinforcement learning through tight lower bounds. In: EWRL 2011 (2011)","DOI":"10.1007\/978-3-642-29946-9_19"},{"key":"27_CR10","unstructured":"Doshi-Velez, F., Wingate, D., Roy, N., Tenenbaum, J.: Nonparametric Bayesian policy priors for reinforcement learning. In: NIPS 2010, pp. 532\u2013540 (2010)"},{"issue":"4","key":"27_CR11","doi-asserted-by":"publisher","first-page":"615","DOI":"10.1214\/aos\/1176342752","volume":"2","author":"T.S. Ferguson","year":"1974","unstructured":"Ferguson, T.S.: Prior distributions on spaces of probability measures. The Annals of Statistics\u00a02(4), 615\u2013629 (1974) ISSN 00905364","journal-title":"The Annals of Statistics"},{"key":"27_CR12","doi-asserted-by":"crossref","unstructured":"Geweke, J.: Bayesian inference in econometric models using Monte Carlo integration. Econometrica: Journal of the Econometric Society, 1317\u20131339 (1989)","DOI":"10.2307\/1913710"},{"key":"27_CR13","unstructured":"Heskes, T.: Solving a huge number of similar tasks: a combination of multi-task learning and a hierarchical Bayesian approach. In: ICML 1998, pp. 233\u2013241. Citeseer (1998)"},{"key":"27_CR14","unstructured":"Lazaric, A., Ghavamzadeh, M.: Bayesian multi-task reinforcement learning. In: ICML 2010 (2010)"},{"key":"27_CR15","doi-asserted-by":"crossref","unstructured":"Natarajan, S., Kunapuli, G., Judah, K., Tadepalli, P., Kersting, K., Shavlik, J.: Multi-agent inverse reinforcement learning. In: ICMLA 2010, pp. 395\u2013400. IEEE (2010)","DOI":"10.1109\/ICMLA.2010.65"},{"key":"27_CR16","unstructured":"Ng, A.Y., Russell, S.: Algorithms for inverse reinforcement learning. In: ICML 2000, pp. 663\u2013670. Morgan Kaufmann (2000)"},{"key":"27_CR17","volume-title":"Markov Decision Processes : Discrete Stochastic Dynamic Programming","author":"M.L. Puterman","year":"2005","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. John Wiley & Sons, New Jersey (2005)"},{"key":"27_CR18","unstructured":"Ramachandran, D., Amir, E.: Bayesian inverse reinforcement learning. In: IJCAI 2007, vol.\u00a051, p. 61801 (2007)"},{"key":"27_CR19","doi-asserted-by":"crossref","unstructured":"Robbins, H.: An empirical Bayes approach to statistics (1955)","DOI":"10.1525\/9780520313880-015"},{"key":"27_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1007\/978-3-642-23808-6_3","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"C.A. Rothkopf","year":"2011","unstructured":"Rothkopf, C.A., Dimitrakakis, C.: Preference Elicitation and Inverse Reinforcement Learning. In: Gunopulos, D., Hofmann, T., Malerba, D., Vazirgiannis, M. (eds.) ECML PKDD 2011. LNCS, vol.\u00a06913, pp. 34\u201348. Springer, Heidelberg (2011)"},{"key":"27_CR21","unstructured":"Syed, U., Schapire, R.E.: A game-theoretic approach to apprenticeship learning. In: NIPS 2008, vol.\u00a010 (2008)"},{"key":"27_CR22","doi-asserted-by":"crossref","unstructured":"Wilson, A., Fern, A., Ray, S., Tadepalli, P.: Multi-task reinforcement learning: a hierarchical Bayesian approach. In: ICML 2007, pp. 1015\u20131022. ACM (2007)","DOI":"10.1145\/1273496.1273624"},{"key":"27_CR23","unstructured":"Ziebart, B.D., Andrew Bagnell, J., Dey, A.K.: Modelling interaction via the principle of maximum causal entropy. In: ICML 2010, Haifa, Israel (2010)"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-29946-9_27.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,15]],"date-time":"2022-01-15T23:03:42Z","timestamp":1642287822000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-29946-9_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642299452","9783642299469"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-29946-9_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}