{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T15:29:50Z","timestamp":1725550190976},"publisher-location":"Berlin, Heidelberg","reference-count":37,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642116872"},{"type":"electronic","value":"9783642116889"}],"license":[{"start":{"date-parts":[[2010,1,1]],"date-time":"2010-01-01T00:00:00Z","timestamp":1262304000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2010,1,1]],"date-time":"2010-01-01T00:00:00Z","timestamp":1262304000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010]]},"DOI":"10.1007\/978-3-642-11688-9_18","type":"book-chapter","created":{"date-parts":[[2010,3,22]],"date-time":"2010-03-22T05:34:40Z","timestamp":1269236080000},"page":"475-510","source":"Crossref","is-referenced-by-count":47,"title":["Traffic Light Control by Multiagent Reinforcement Learning Systems"],"prefix":"10.1007","author":[{"given":"Bram","family":"Bakker","sequence":"first","affiliation":[]},{"given":"Shimon","family":"Whiteson","sequence":"additional","affiliation":[]},{"given":"Leon","family":"Kester","sequence":"additional","affiliation":[]},{"given":"Frans C. A.","family":"Groen","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"issue":"3","key":"18_CR1","doi-asserted-by":"publisher","first-page":"278","DOI":"10.1061\/(ASCE)0733-947X(2003)129:3(278)","volume":"129","author":"B. Abdulhai","year":"2003","unstructured":"Abdulhai, B., et al.: Reinforcement Learning for True Adaptive Traffic Signal Control. ASCE Journal of Transportation Engineering\u00a0129(3), 278\u2013285 (2003)","journal-title":"ASCE Journal of Transportation Engineering"},{"key":"18_CR2","first-page":"103","volume":"13","author":"A.W. Moore","year":"1993","unstructured":"Moore, A.W., Atkenson, C.G.: Prioritized Sweeping: Reinforcement Learning with less data and less time. Machine Learning\u00a013, 103\u2013130 (1993)","journal-title":"Machine Learning"},{"key":"18_CR3","unstructured":"Bakker, B., Steingrover, M., Schouten, R., Nijhuis, E., Kester, L.: Cooperative multi-agent reinforcement learning of traffic lights. In: Proceedings of the Workshop on Cooperative Multi-Agent Learning, European Conference on Machine Learning, ECML 2005 (2005)"},{"key":"18_CR4","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1016\/0004-3702(94)00011-O","volume":"72","author":"A.G. Barto","year":"1995","unstructured":"Barto, A.G., Bradtke, S.J., Singh, S.P.: Learning to act using real-time dynamic programming. Artificial Intelligence\u00a072, 81\u2013138 (1995)","journal-title":"Artificial Intelligence"},{"key":"18_CR5","volume-title":"Dynamic Programming","author":"R.E. Bellman","year":"1957","unstructured":"Bellman, R.E.: Dynamic Programming. Princeton University Press, Princeton (1957)"},{"key":"18_CR6","unstructured":"Cassandra, T.: Exact and Approximate Algorithms for Partially Observable Markov Decision Processes. PhD thesis, Brown University (1998)"},{"key":"18_CR7","unstructured":"Chiu, S.: Adaptive Traffic Signal Control Using Fuzzy Logic. In: Proceedings of the IEEE Intelligent Vehicles Symposium, pp. 98\u2013107 (1992)"},{"key":"18_CR8","unstructured":"Crick, C., Pfeffer, A.: Loopy belief propagation as a basis for communication in sensor networks. In: Proceedings of Uncertainty in Artificial Intelligence, UAI (2003)"},{"key":"18_CR9","unstructured":"Foy, M.D., Benekohal, R.F., Goldberg, D.E.: Signal timing determination using genetic algorithms. Transportation Research Record No. 1365, pp. 108\u2013115"},{"key":"18_CR10","unstructured":"Guestrin, C., Lagoudakis, M.G., Parr, R.: Coordinated reinforcement learning. In: Proceedings Nineteenth International Conference on Machine Learning (ICML), pp. 227\u2013234 (2002)"},{"key":"18_CR11","first-page":"33","volume":"13","author":"M. Hauskrecht","year":"2000","unstructured":"Hauskrecht, M.: Value-function approximations for partially observable markov decision processes. J. of AI Research\u00a013, 33\u201394 (2000)","journal-title":"J. of AI Research"},{"key":"18_CR12","unstructured":"Jaakkola, T., Singh, S.P., Jordan, M.I.: Monte-carlo reinforcement learning in non-Markovian decision problems. Advances in Neural Information Processing Systems\u00a07 (1995)"},{"issue":"1-2","key":"18_CR13","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1016\/S0004-3702(98)00023-X","volume":"101","author":"L.P. Kaelbling","year":"1998","unstructured":"Kaelbling, L.P., Littman, M.L., Cassandra, A.R.: Planning and acting in partially observable stochastic domains. Artificial Intelligence\u00a0101(1-2), 99\u2013134 (1998)","journal-title":"Artificial Intelligence"},{"key":"18_CR14","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"L.P. Kaelbling","year":"1996","unstructured":"Kaelbling, L.P., Littman, M.L., Moore, A.W.: Reinforcement learning: A survey. Journal of Artificial Intelligence Research\u00a04, 237\u2013285 (1996)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"18_CR15","first-page":"1789","volume":"7","author":"J.R. Kok","year":"2006","unstructured":"Kok, J.R., Vlassis, N.: Collaborative Multiagent Reinforcement Learning by Payoff Propagation. J. Mach. Learn. Res.\u00a07, 1789\u20131828 (2006)","journal-title":"J. Mach. Learn. Res."},{"key":"18_CR16","doi-asserted-by":"publisher","first-page":"498","DOI":"10.1109\/18.910572","volume":"47","author":"F.R. Kschischang","year":"2001","unstructured":"Kschischang, F.R., Frey, B.J., Loeliger, H.A.: Factor graphs and the sum-product algorithm. IEEE Transactions on Information Theory\u00a047, 498\u2013519 (2001)","journal-title":"IEEE Transactions on Information Theory"},{"key":"18_CR17","doi-asserted-by":"crossref","unstructured":"Liu, J., Chen, R.: Sequential Monte Carlo methods for dynamic systems. Journal of the American Statistical Association\u00a093 (1998)","DOI":"10.1080\/01621459.1998.10473765"},{"key":"18_CR18","volume-title":"Machine learning","author":"T.M. Mitchell","year":"1997","unstructured":"Mitchell, T.M.: Machine learning. McGraw-Hill, New York (1997)"},{"issue":"6","key":"18_CR19","first-page":"526","volume":"17","author":"M. Shoufeng","year":"2002","unstructured":"Shoufeng, M., et al.: Agent-based learning control method for urban traffic signal of single intersection. Journal of Systems Engineering\u00a017(6), 526\u2013530 (2002)","journal-title":"Journal of Systems Engineering"},{"key":"18_CR20","doi-asserted-by":"publisher","first-page":"1071","DOI":"10.1287\/opre.21.5.1071","volume":"21","author":"R.D. Smallwood","year":"1973","unstructured":"Smallwood, R.D., Sondik, E.J.: The optimal control of partially observable Markov processes over a finite horizon. Operations Research\u00a021, 1071\u20131088 (1973)","journal-title":"Operations Research"},{"key":"18_CR21","doi-asserted-by":"crossref","unstructured":"Spaan, M.T.J., Vlassis, N.: A point-based POMDP algorithm for robot planning. In: Proceedings of 2004 IEEE International Conference on Robotics and Automation, ICRA (2004)","DOI":"10.1109\/ROBOT.2004.1307420"},{"issue":"3","key":"18_CR22","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1016\/S0968-090X(97)00012-0","volume":"5","author":"J.C. Spall","year":"1997","unstructured":"Spall, J.C., Chin, D.C.: Traffic-Responsive Signal Timing for System-wide Traffic Control. Transportation Research Part C: Emerging Technologies\u00a05(3), 153\u2013163 (1997)","journal-title":"Transportation Research Part C: Emerging Technologies"},{"key":"18_CR23","unstructured":"Steingr\u00f6ver, M., Schouten, R., Peelen, S., Nijhuis, E., Bakker, B.: Reinforcement learning of traffic light controllers adapting to traffic congestion. In: Proceedings of the Belgium-Netherlands Artificial Intelligence Conference, BNAIC 2005 (2005)"},{"key":"18_CR24","unstructured":"Steingr\u00f6ver, M., Schouten, R.: Reinforcement Learning of Traffic Light Controllers under Partial Observability. MSc Thesis, Informatics Institute, Universiteit van Amsterdam (2007)"},{"key":"18_CR25","doi-asserted-by":"publisher","first-page":"576","DOI":"10.1016\/0022-247X(65)90027-2","volume":"12","author":"C.T. Striebel","year":"1965","unstructured":"Striebel, C.T.: Sufficient Statistics in the Optimal Control of. Stochastic Systems. Journal of Mathematical Analysis and Applications\u00a012, 576\u2013592 (1965)","journal-title":"Journal of Mathematical Analysis and Applications"},{"key":"18_CR26","volume-title":"Reinforcement learning: An introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement learning: An introduction. MIT Press, Cambridge (1998)"},{"key":"18_CR27","volume-title":"Proceedings of the Seventh International Conference on Machine Learning (ICML)","author":"R.S. Sutton","year":"1990","unstructured":"Sutton, R.S.: Integrated architectures for learning, planning, and reacting based on approximating dynamic programming. In: Proceedings of the Seventh International Conference on Machine Learning (ICML). Morgan Kaufmann, Austin (1990)"},{"key":"18_CR28","unstructured":"Taale, H., B\u00e4ck, T., Preuss, M., Eiben, A.E., de Graaf, J.M., Schippers, C.A.: Optimizing traffic light controllers by means of evolutionary algorithms. In: Proceedings of EUFIT 1998 (1998)"},{"key":"18_CR29","unstructured":"Tan, K.K., Khalid, M., Yusof, R.: Intelligent traffic lights control by fuzzy logic. Malaysian Journal of Computer Science\u00a09-2 (1995)"},{"key":"18_CR30","unstructured":"Thorpe, T.L., Anderson, C.: Traffic light control using Sarsa with three state representations. Technical report, IBM Cooperation (1996)"},{"key":"18_CR31","volume-title":"Synthesis Lectures in Artificial Intelligence and Machine Learning","author":"N. Vlassis","year":"2007","unstructured":"Vlassis, N.: A Concise Introduction to Multiagent Systems and Distributed Artificial Intelligence. In: Synthesis Lectures in Artificial Intelligence and Machine Learning. Morgan & Claypool Publishers, San Francisco (2007)"},{"issue":"3","key":"18_CR32","doi-asserted-by":"publisher","first-page":"439","DOI":"10.1287\/opre.42.3.439","volume":"42","author":"C.C. White","year":"1994","unstructured":"White, C.C., Scherer, W.T.: Finite memory suboptimal design for partially observed Markov decision processes. Operations Research\u00a042(3), 439\u2013455 (1994)","journal-title":"Operations Research"},{"key":"18_CR33","unstructured":"Wiering, M.: Multi-Agent Reinforcement Learning for Traffic Light Control. In: Proc. 17th International Conf. on Machine Learning (ICML), pp. 1151\u20131158 (2000)"},{"key":"18_CR34","unstructured":"Wiering, M., van Veenen, J., Vreeken, J., Koopman, A.: Intelligent traffic light control. Technical report, Dept.\u00a0of Information and Computing Sciences, Universiteit Utrecht (2004)"},{"key":"18_CR35","unstructured":"Wiering, M., Vreeken, J., van Veenen, J., Koopman, A.: Simulation and optimization of traffic in a city. In: IEEE Intelligent Vehicles symposium (IV 2004) (2004)"},{"key":"18_CR36","unstructured":"Yedidia, J.S., Freeman, W.T., Weiss, Y.: Understanding belief propagation and its generalizations. In: Exploring Artificial Intelligence in the New Millennium, ch.\u00a08, pp. 239\u2013269 (2003)"},{"key":"18_CR37","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1613\/jair.305","volume":"5","author":"N.L. Zhang","year":"1996","unstructured":"Zhang, N.L., Poole, D.: Exploiting causal independence in Bayesian network inference. Journal of Artificial Intelligence Research\u00a05, 301\u2013328 (1996)","journal-title":"Journal of Artificial Intelligence Research"}],"container-title":["Studies in Computational Intelligence","Interactive Collaborative Information Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-11688-9_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,6]],"date-time":"2023-02-06T23:22:59Z","timestamp":1675725779000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-642-11688-9_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010]]},"ISBN":["9783642116872","9783642116889"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-11688-9_18","relation":{},"ISSN":["1860-949X","1860-9503"],"issn-type":[{"type":"print","value":"1860-949X"},{"type":"electronic","value":"1860-9503"}],"subject":[],"published":{"date-parts":[[2010]]}}}