{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T07:31:52Z","timestamp":1725521512283},"publisher-location":"Berlin, Heidelberg","reference-count":18,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540897217"},{"type":"electronic","value":"9783540897224"}],"license":[{"start":{"date-parts":[[2008,1,1]],"date-time":"2008-01-01T00:00:00Z","timestamp":1199145600000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2008]]},"DOI":"10.1007\/978-3-540-89722-4_7","type":"book-chapter","created":{"date-parts":[[2008,11,26]],"date-time":"2008-11-26T13:57:14Z","timestamp":1227707834000},"page":"82-95","source":"Crossref","is-referenced-by-count":1,"title":["Evaluation of Batch-Mode Reinforcement Learning Methods for Solving DEC-MDPs with Changing Action Sets"],"prefix":"10.1007","author":[{"given":"Thomas","family":"Gabel","sequence":"first","affiliation":[]},{"given":"Martin","family":"Riedmiller","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"7_CR1","unstructured":"Beasley, J.: OR-Library (2005), \n \n http:\/\/people.brunel.ac.uk\/~mastjjb\/jeb\/info.html"},{"key":"7_CR2","first-page":"302","volume-title":"Proceedings of AAMAS 2004","author":"R. Becker","year":"2004","unstructured":"Becker, R., Zilberstein, S., Lesser, V.: Decentralized Markov Decision Processes with Event-Driven Interactions. In: Proceedings of AAMAS 2004, pp. 302\u2013309. ACM Press, New York (2004)"},{"key":"7_CR3","first-page":"423","volume":"22","author":"R. Becker","year":"2004","unstructured":"Becker, R., Zilberstein, S., Lesser, V., Goldman, C.: Solving Transition Independent Decentralized MDPs. Journal of AI Research\u00a022, 423\u2013455 (2004)","journal-title":"Journal of AI Research"},{"issue":"4","key":"7_CR4","doi-asserted-by":"publisher","first-page":"819","DOI":"10.1287\/moor.27.4.819.297","volume":"27","author":"D. Bernstein","year":"2002","unstructured":"Bernstein, D., Givan, D., Immerman, N., Zilberstein, S.: The Complexity of Decentralized Control of Markov Decision Processes. Mathematics of Operations Research\u00a027(4), 819\u2013840 (2002)","journal-title":"Mathematics of Operations Research"},{"key":"7_CR5","first-page":"478","volume-title":"Proceedings of IJCAI 1999","author":"C. Boutilier","year":"1999","unstructured":"Boutilier, C.: Sequential Optimality and Coordination in Multiagent Systems. In: Proceedings of IJCAI 1999, Sweden, pp. 478\u2013485. Morgan Kaufmann, San Francisco (1999)"},{"key":"7_CR6","first-page":"11","volume":"19","author":"R. Brafman","year":"2003","unstructured":"Brafman, R., Tennenholtz, M.: Learning to Cooperate Efficiently: A Model-Based Approach. Journal of AI Research\u00a019, 11\u201323 (2003)","journal-title":"Journal of AI Research"},{"issue":"2","key":"7_CR7","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1007\/s10458-006-9010-5","volume":"15","author":"O. Buffet","year":"2007","unstructured":"Buffet, O., Dutech, A., Charpillet, F.: Shaping Multi-Agent Systems with Gradient Reinforcement Learning. Autonomous Agent and Multi-Agent System Journal\u00a015(2), 197\u2013220 (2007)","journal-title":"Autonomous Agent and Multi-Agent System Journal"},{"key":"7_CR8","unstructured":"Ernst, D., Geurts, P., Wehenkel, L.: Tree-Based Batch Mode Reinforcement Learning. Journal of Machine Learning Research\u00a0(6), 504\u2013556 (2005)"},{"key":"7_CR9","unstructured":"Gabel, T., Riedmiller, M.: Adaptive Reactive Job-Shop Scheduling with Learning Agents. International Journal of Information Technology and Intelligent Computing\u00a02(4) (2007)"},{"key":"7_CR10","unstructured":"Gabel, T., Riedmiller, M.: Reinforcement Learning for DEC-MDPs with Changing Action Sets and Partially Ordered Dependencies. In: Proceedings of AAMAS 2008, Estoril, Portugal, pp. 1333\u20131336. IFAAMAS (2008)"},{"key":"7_CR11","first-page":"137","volume-title":"Proceedings of AAMAS 2003","author":"C. Goldman","year":"2003","unstructured":"Goldman, C., Zilberstein, S.: Optimizing Information Exchange in Cooperative Multi-Agent Systems. In: Proceedings of AAMAS 2003, Melbourne, Australia, pp. 137\u2013144. ACM Press, New York (2003)"},{"key":"7_CR12","first-page":"535","volume-title":"Proceedings of ICML 2000","author":"M. Lauer","year":"2000","unstructured":"Lauer, M., Riedmiller, M.: An Algorithm for Distributed Reinforcement Learning in Cooperative Multi-Agent Systems. In: Proceedings of ICML 2000, Stanford, USA, pp. 535\u2013542. AAAI Press, Menlo Park (2000)"},{"key":"7_CR13","volume-title":"Scheduling. Theory, Algorithms, and Systems","author":"M. Pinedo","year":"2002","unstructured":"Pinedo, M.: Scheduling. Theory, Algorithms, and Systems. Prentice Hall, Englewood Cliffs (2002)"},{"key":"7_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1007\/11564096_32","volume-title":"Machine Learning: ECML 2005","author":"M. Riedmiller","year":"2005","unstructured":"Riedmiller, M.: Neural Fitted Q Iteration \u2013 First Experiences with a Data Efficient Neural Reinforcement Learning Method. In: Gama, J., Camacho, R., Brazdil, P.B., Jorge, A.M., Torgo, L. (eds.) ECML 2005. LNCS, vol.\u00a03720, pp. 317\u2013328. Springer, Heidelberg (2005)"},{"key":"7_CR15","doi-asserted-by":"crossref","unstructured":"Riedmiller, M., Braun, H.: A Direct Adaptive Method for Faster Backpropagation Learning: The RPROP Algorithm. In: Ruspini, H. (ed.) Proceedings of ICNN, San Francisco, USA, pp. 586\u2013591 (1993)","DOI":"10.1109\/ICNN.1993.298623"},{"key":"7_CR16","first-page":"1254","volume-title":"Proceedings of AAMAS 2004","author":"D. Szer","year":"2005","unstructured":"Szer, D., Charpillet, F.: Coordination through Mutual Notification in Cooperative Multiagent RL. In: Proceedings of AAMAS 2004, pp. 1254\u20131255. IEEE Computer Society, Los Alamitos (2005)"},{"key":"7_CR17","first-page":"1105","volume-title":"Proceedings of AAMAS 2005","author":"K. Verbeeck","year":"2005","unstructured":"Verbeeck, K., Nowe, A., Tuyls, K.: Coordinated Exploration in Multi-Agent Reinforcement Learning: An Application to Load-Balancing. In: Proceedings of AAMAS 2005, Utrecht, The Netherlands, pp. 1105\u20131106. ACM Press, New York (2005)"},{"key":"7_CR18","first-page":"279","volume":"8","author":"C. Watkins","year":"1992","unstructured":"Watkins, C., Dayan, P.: Q-Learning. Machine Learning\u00a08, 279\u2013292 (1992)","journal-title":"Machine Learning"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-89722-4_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,3,4]],"date-time":"2019-03-04T05:20:17Z","timestamp":1551676817000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-89722-4_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008]]},"ISBN":["9783540897217","9783540897224"],"references-count":18,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-89722-4_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2008]]}}}