{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,16]],"date-time":"2025-04-16T11:46:36Z","timestamp":1744803996467},"reference-count":28,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2015,4,1]],"date-time":"2015-04-01T00:00:00Z","timestamp":1427846400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["60736027","61174072","61203039","61222302","91224008","U1301254"],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Specialized Research Fund","award":["20120002120009"]},{"name":"New Star of Science and Technology","award":["xx2014B056"]},{"name":"TNList Funding"},{"name":"TNList Cross-Discipline Foundation"},{"name":"TNList Funding for Excellent Young Scholars"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Automatica"],"published-print":{"date-parts":[[2015,4]]},"DOI":"10.1016\/j.automatica.2015.01.006","type":"journal-article","created":{"date-parts":[[2015,2,11]],"date-time":"2015-02-11T13:30:49Z","timestamp":1423661449000},"page":"29-35","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":16,"special_numbering":"C","title":["Parameterized Markov decision process and its application to service rate control"],"prefix":"10.1016","volume":"54","author":[{"given":"Li","family":"Xia","sequence":"first","affiliation":[]},{"given":"Qing-Shan","family":"Jia","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.automatica.2015.01.006_br000005","doi-asserted-by":"crossref","first-page":"319","DOI":"10.1613\/jair.806","article-title":"Infinite-horizon policy-gradient estimation","volume":"15","author":"Baxter","year":"2001","journal-title":"Journal of Artificial Intelligence Research"},{"key":"10.1016\/j.automatica.2015.01.006_br000010","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1287\/opre.35.2.215","article-title":"Aggregation in dynamic programming","volume":"35","author":"Bean","year":"1987","journal-title":"Operations Research"},{"key":"10.1016\/j.automatica.2015.01.006_br000015","doi-asserted-by":"crossref","first-page":"2471","DOI":"10.1016\/j.automatica.2009.07.008","article-title":"Natural actor-critic algorithms","volume":"45","author":"Bhatnagar","year":"2009","journal-title":"Automatica"},{"key":"10.1016\/j.automatica.2015.01.006_br000020","doi-asserted-by":"crossref","unstructured":"Busoniu, L., De Schutter, B., Babuska, R., & Ernst, D. 2010. Using prior knowledge to accelerate online least-squares policy iteration. In Proceedings 2010 IEEE international conference on automation, quality and testing, robotics (AQTR-10), 28 May, Cluj-Napoca, Romania.","DOI":"10.1109\/AQTR.2010.5520917"},{"key":"10.1016\/j.automatica.2015.01.006_br000025","series-title":"Stochastic learning and optimization\u2014a sensitivity-based approach","author":"Cao","year":"2007"},{"key":"10.1016\/j.automatica.2015.01.006_br000030","doi-asserted-by":"crossref","first-page":"1382","DOI":"10.1109\/9.633827","article-title":"Perturbation realization, potentials, and sensitivity analysis of Markov processes","volume":"42","author":"Cao","year":"1997","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.automatica.2015.01.006_br000035","doi-asserted-by":"crossref","first-page":"65","DOI":"10.1007\/s11768-004-0025-7","article-title":"Performance sensitivities for parameterized Markov systems","volume":"2","author":"Cao","year":"2004","journal-title":"Journal of Control Theory and Applications"},{"key":"10.1016\/j.automatica.2015.01.006_br000040","doi-asserted-by":"crossref","first-page":"251","DOI":"10.1023\/A:1008349927281","article-title":"Simulation budget allocation for further enhancing the efficiency of ordinal optimization","volume":"10","author":"Chen","year":"2000","journal-title":"Discrete Event Dynamic System: Theory Applications"},{"key":"10.1016\/j.automatica.2015.01.006_br000045","series-title":"Handbook of Markov decision processes: methods and applications","author":"Feinberg","year":"2002"},{"key":"10.1016\/j.automatica.2015.01.006_br000050","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1145\/84537.84552","article-title":"Likelihood ratio gradient estimation for stochastic systems","volume":"33","author":"Glynn","year":"1990","journal-title":"Communications of the ACM"},{"key":"10.1016\/j.automatica.2015.01.006_br000055","doi-asserted-by":"crossref","first-page":"252","DOI":"10.1287\/opre.15.2.254","article-title":"Closed queueing systems with exponential servers","volume":"15","author":"Gordon","year":"1967","journal-title":"Operations Research"},{"key":"10.1016\/j.automatica.2015.01.006_br000060","series-title":"Continuous-time Markov decision processes: theory and applications","author":"Guo","year":"2009"},{"issue":"3","key":"10.1016\/j.automatica.2015.01.006_br000065","doi-asserted-by":"crossref","first-page":"756","DOI":"10.1287\/opre.1090.0738","article-title":"Series expansions for continuous-time Markov processes","volume":"58","author":"Heidergott","year":"2010","journal-title":"Operations Research"},{"issue":"3","key":"10.1016\/j.automatica.2015.01.006_br000070","doi-asserted-by":"crossref","first-page":"381","DOI":"10.1017\/S0269964807000034","article-title":"Series expansions for finite-state Markov chains","volume":"21","author":"Heidergott","year":"2007","journal-title":"Probability in the Engineering and Informational Sciences"},{"key":"10.1016\/j.automatica.2015.01.006_br000075","doi-asserted-by":"crossref","first-page":"559","DOI":"10.1007\/BF00933971","article-title":"Optimization and perturbation analysis of queueing networks","volume":"40","author":"Ho","year":"1983","journal-title":"Journal of Optimization Theory and Applications"},{"key":"10.1016\/j.automatica.2015.01.006_br000080","series-title":"Perturbation analysis of discrete event systems","author":"Ho","year":"1991"},{"key":"10.1016\/j.automatica.2015.01.006_br000085","doi-asserted-by":"crossref","first-page":"115","DOI":"10.1287\/opre.1050.0237","article-title":"Discrete optimization via simulation using COMPASS","volume":"54","author":"Hong","year":"2006","journal-title":"Operations Research"},{"key":"10.1016\/j.automatica.2015.01.006_br000090","doi-asserted-by":"crossref","first-page":"333","DOI":"10.1109\/TAC.2010.2052697","article-title":"On state aggregation to approximate complex value functions in large-scale Markov decision processes","volume":"56","author":"Jia","year":"2011","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.automatica.2015.01.006_br000095","first-page":"1107","article-title":"Least-squares policy iteration","volume":"4","author":"Lagoudakis","year":"2003","journal-title":"Journal of Machine Learning Research"},{"key":"10.1016\/j.automatica.2015.01.006_br000100","doi-asserted-by":"crossref","first-page":"191","DOI":"10.1109\/9.905687","article-title":"Simulation-based optimization of Markov reward processes","volume":"46","author":"Marbach","year":"2001","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.automatica.2015.01.006_br000105","series-title":"Markov decision processes: discrete stochastic dynamic programming","author":"Puterman","year":"1994"},{"key":"10.1016\/j.automatica.2015.01.006_br000110","doi-asserted-by":"crossref","first-page":"332","DOI":"10.1109\/9.119632","article-title":"Multivariate stochastic approximation using a simultaneous perturbation gradient approximation","volume":"37","author":"Spall","year":"1992","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.automatica.2015.01.006_br000115","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume":"12","author":"Sutton","year":"2000","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.automatica.2015.01.006_br000120","doi-asserted-by":"crossref","unstructured":"Wang, D.X., & Cao, X.R. (2011). Event-based optimization for POMDP and its application in portfolio management. In Proceedings of the 18th IFAC world congress (pp. 3228\u20133233). Milano, Italy.","DOI":"10.3182\/20110828-6-IT-1002.01514"},{"key":"10.1016\/j.automatica.2015.01.006_br000125","doi-asserted-by":"crossref","first-page":"133","DOI":"10.1007\/s10626-013-0167-1","article-title":"Event-based optimization of admission control in open queueing networks","volume":"24","author":"Xia","year":"2014","journal-title":"Discrete Event Dynamic Systems: Theory and Applications"},{"key":"10.1016\/j.automatica.2015.01.006_br000130","doi-asserted-by":"crossref","first-page":"546","DOI":"10.1016\/j.ejor.2014.01.038","article-title":"Service rate control of closed Jackson networks from game theoretic perspective","volume":"237","author":"Xia","year":"2014","journal-title":"European Journal of Operational Research"},{"key":"10.1016\/j.automatica.2015.01.006_br000135","doi-asserted-by":"crossref","unstructured":"Xia, L., & Jia, Q.S. (2013). Policy iteration for parameterized Markov decision processes and its application. In Proceedings of the 9th Asian Control Conference (ASCC2013) (pp. 1\u20136). June 23\u201326, 2013, Istanbul, Turkey.","DOI":"10.1109\/ASCC.2013.6606023"},{"key":"10.1016\/j.automatica.2015.01.006_br000140","doi-asserted-by":"crossref","first-page":"1051","DOI":"10.1109\/TAC.2012.2218145","article-title":"Max\u2013min optimality of service rate control in closed queueing networks","volume":"58","author":"Xia","year":"2013","journal-title":"IEEE Transactions on Automatic Control"}],"container-title":["Automatica"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0005109815000072?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0005109815000072?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2019,8,20]],"date-time":"2019-08-20T17:20:57Z","timestamp":1566321657000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0005109815000072"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,4]]},"references-count":28,"alternative-id":["S0005109815000072"],"URL":"https:\/\/doi.org\/10.1016\/j.automatica.2015.01.006","relation":{},"ISSN":["0005-1098"],"issn-type":[{"value":"0005-1098","type":"print"}],"subject":[],"published":{"date-parts":[[2015,4]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Parameterized Markov decision process and its application to service rate control","name":"articletitle","label":"Article Title"},{"value":"Automatica","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.automatica.2015.01.006","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"Copyright \u00a9 2015 Elsevier Ltd. All rights reserved.","name":"copyright","label":"Copyright"}]}}