{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,6]],"date-time":"2024-09-06T08:45:19Z","timestamp":1725612319380},"reference-count":29,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2005,6,1]],"date-time":"2005-06-01T00:00:00Z","timestamp":1117584000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Discrete Event Dyn Syst"],"published-print":{"date-parts":[[2005,6]]},"DOI":"10.1007\/s10626-004-6211-4","type":"journal-article","created":{"date-parts":[[2005,2,18]],"date-time":"2005-02-18T18:22:11Z","timestamp":1108750931000},"page":"169-197","source":"Crossref","is-referenced-by-count":70,"title":["Basic Ideas for Event-Based Optimization of Markov Systems"],"prefix":"10.1007","volume":"15","author":[{"given":"Xi-Ren","family":"Cao","sequence":"first","affiliation":[]}],"member":"297","reference":[{"key":"CR1","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1023\/A:1022140919877","volume":"13","author":"A. Barto","year":"2003","unstructured":"Barto, A., and Mahadevan, S. 2003. Recent advances in hierarchical reinforcement learning, special issue on reinforcement learning. Discret. Event Dyn. Syst. Theory Appl. 13: 41?77.","journal-title":"Discret. Event Dyn. Syst. Theory Appl."},{"key":"CR2","doi-asserted-by":"crossref","first-page":"319","DOI":"10.1613\/jair.806","volume":"15","author":"J. Baxter","year":"2001","unstructured":"Baxter, J., and Bartlett, P. L. 2001. Infinite-horizon policy-gradient estimation. J. Artif. Intell. Res. 15: 319?350.","journal-title":"J. Artif. Intell. Res."},{"key":"CR3","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1613\/jair.807","volume":"15","author":"J. Baxter","year":"2001","unstructured":"Baxter, J., Bartlett, P. L., and Weaver, L. 2001. Experiments with infinite-horizon policy-gradient estimation. J. Artif. Intell. Res. 15: 351?381.","journal-title":"J. Artif. Intell. Res."},{"key":"CR4","volume-title":"Dynamic Programming and Optimal Control, Volume I and II","author":"D. P. Bertsekas","year":"1995","unstructured":"Bertsekas, D. P. 1995. Dynamic Programming and Optimal Control, Volume I and II. Belmont, MA: Athena Scientific."},{"key":"CR5","doi-asserted-by":"crossref","DOI":"10.1007\/BFb0035250","volume-title":"Realization Probabilities: The Dynamics of Queueing Systems","author":"X. R. Cao","year":"1994","unstructured":"Cao, X. R. 1994. Realization Probabilities: The Dynamics of Queueing Systems. New York: Springer-Verlag."},{"key":"CR6","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1023\/A:1008260528575","volume":"8","author":"X. R. Cao","year":"1998","unstructured":"Cao, X. R. 1998. The relation among potentials, perturbation analysis, Markov decision processes, and other topics. J. Discret. Event Dyn. Syst. 8: 71?87.","journal-title":"J. Discret. Event Dyn. Syst."},{"issue":"3","key":"CR7","doi-asserted-by":"crossref","first-page":"527","DOI":"10.1023\/A:1022634422482","volume":"100","author":"X. R. Cao","year":"1999","unstructured":"Cao, X. R. 1999. Single sample path based optimization of Markov chains. J. Optim. Theory Appl. 100(3): 527?548.","journal-title":"J. Optim. Theory Appl."},{"key":"CR8","doi-asserted-by":"crossref","first-page":"771","DOI":"10.1016\/S0005-1098(99)00207-1","volume":"36","author":"X. R. Cao","year":"2000","unstructured":"Cao, X. R. 2000. A unified approach to Markov decision problems and performance sensitivity analysis. Automatica 36: 771?774.","journal-title":"Automatica"},{"key":"CR9","doi-asserted-by":"crossref","first-page":"2129","DOI":"10.1109\/TAC.2004.838494","volume":"49","author":"X. R. Cao","year":"2004","unstructured":"Cao, X. R. 2004a. The potential structure of sample paths and performance sensitivities of Markov systems. IEEE Trans. Automat. Contr. 49: 2129?2142.","journal-title":"IEEE Trans. Automat. Contr."},{"key":"CR10","unstructured":"Cao, X. R. 2004b. A basic formula for on-line policy gradient algorithms, IEEE Trans. Automat. Contr. to appear."},{"key":"CR11","unstructured":"Cao, X. R. 2004c. Event-based optimization of Markov systems. Manuscript to be submitted."},{"key":"CR12","doi-asserted-by":"crossref","first-page":"1382","DOI":"10.1109\/9.633827","volume":"42","author":"X. R. Cao","year":"1997","unstructured":"Cao, X. R., and Chen, H. F. 1997. Perturbation realization, potentials and sensitivity analysis of Markov processes. IEEE Trans. Automat. Contr. 42: 1382?1393.","journal-title":"IEEE Trans. Automat. Contr."},{"key":"CR13","doi-asserted-by":"crossref","first-page":"1749","DOI":"10.1016\/j.automatica.2004.05.003","volume":"40","author":"X. R. Cao","year":"2004","unstructured":"Cao, X. R., and Guo, X. 2004. A unified approach to Markov decision problems and performance sensitivity analysis with discounted and average criteria: Multichain cases. Automatica 40: 1749?1759.","journal-title":"Automatica"},{"key":"CR14","doi-asserted-by":"crossref","first-page":"482","DOI":"10.1109\/87.701341","volume":"6","author":"X. R. Cao","year":"1998","unstructured":"Cao, X. R., and Wan, Y. W. 1998. Algorithms for sensitivity analysis of Markov systems through potentials and perturbation realization. IEEE Trans. Control Syst. Technol. 6: 482?494.","journal-title":"IEEE Trans. Control Syst. Technol."},{"key":"CR15","doi-asserted-by":"crossref","first-page":"1814","DOI":"10.1109\/9.545747","volume":"41","author":"X. R. Cao","year":"1996","unstructured":"Cao, X. R., Yuan, X. M., and Qiu, L. 1996. A single sample path-based performance sensitivity formula for Markov chains. IEEE Trans. Automat. Contr. 41: 1814?1817.","journal-title":"IEEE Trans. Automat. Contr."},{"key":"CR16","doi-asserted-by":"crossref","first-page":"929","DOI":"10.1016\/S0005-1098(01)00282-5","volume":"38","author":"X. R. Cao","year":"2002","unstructured":"Cao, X. R., Ren, Z. Y., Bhatnagar, S., Fu, M., and Marcus, S. 2002. A time aggregation approach to Markov decision processes. Automatica 38: 929?943.","journal-title":"Automatica"},{"key":"CR17","doi-asserted-by":"crossref","first-page":"1400","DOI":"10.1109\/9.299620","volume":"39","author":"E. K. P. Chong","year":"1994","unstructured":"Chong, E. K. P., and Ramadge, P. J. 1994. Stochastic optimization of regenerative systems using infinitesimal perturbation analysis. IEEE Trans. Automat. Contr. 39: 1400?1410.","journal-title":"IEEE Trans. Automat. Contr."},{"key":"CR18","doi-asserted-by":"crossref","first-page":"213","DOI":"10.1017\/S0269964803172051","volume":"17","author":"W. L. Cooper","year":"2003","unstructured":"Cooper, W. L., Henderson, S. G., and Lewis, M. E. 2003. Convergence of simulation-based policy iteration. Probab. Eng. Inf. Sci. 17: 213?234.","journal-title":"Probab. Eng. Inf. Sci."},{"key":"CR19","unstructured":"Dijk, N. V. 1993. Queueing Networks and Product Forms: A Systems Approach. Chichester: John Willey and Sons."},{"key":"CR20","doi-asserted-by":"crossref","first-page":"493","DOI":"10.1109\/TAC.2004.825647","volume":"49","author":"H. T. Fang","year":"2004","unstructured":"Fang, H. T., and Cao, X. R. 2004. Potential-based on-line policy iteration algorithms for Markov decision processes. IEEE Trans. Automat. Contr. 49: 493?505.","journal-title":"IEEE Trans. Automat. Contr."},{"issue":"4","key":"CR21","doi-asserted-by":"crossref","first-page":"559","DOI":"10.1007\/BF00933971","volume":"40","author":"Y. C. Ho","year":"1983","unstructured":"Ho, Y. C., and Cao, X. R. 1983. Perturbation analysis and optimization of queueing networks. J. Optim. Theory Appl. 40(4): 559?582.","journal-title":"J. Optim. Theory Appl."},{"key":"CR22","volume-title":"Perturbation Analysis of Discrete-Event Dynamic Systems","author":"Y. C. Ho","year":"1991","unstructured":"Ho, Y. C., and Cao, X. R. 1991. Perturbation Analysis of Discrete-Event Dynamic Systems. Boston: Kluwer Academic Publisher."},{"key":"CR23","doi-asserted-by":"crossref","first-page":"783","DOI":"10.1109\/TAC.2003.811254","volume":"48","author":"Y. C. Ho","year":"2003","unstructured":"Ho, Y. C., Zhao, Q. C., and Pepyne, D. L. 2003. The no free lunch theorem, complexity and computer security. IEEE Trans. Automat. Contr. 48: 783?793.","journal-title":"IEEE Trans. Automat. Contr."},{"key":"CR24","doi-asserted-by":"crossref","first-page":"191","DOI":"10.1109\/9.905687","volume":"46","author":"P. Marbach","year":"2001","unstructured":"Marbach, P., and Tsitsiklis, T. N. 2001. Simulation-based optimization of Markov reward processes. IEEE Trans. Automat. Contr. 46: 191?209.","journal-title":"IEEE Trans. Automat. Contr."},{"key":"CR25","unstructured":"Meuleau, N., Peshkin, L., Kim, K. -E., and Kaelbling, P. L. 1999. Learning finite-state controllers for partially observable environments. Proceedings of the Fifteenth International Conference on Uncertainty in Artificial Intelligence."},{"key":"CR26","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"M. L. Puterman","year":"1994","unstructured":"Puterman, M. L. 1994. Markov Decision Processes: Discrete Stochastic Dynamic Programming. New York: Wiley."},{"key":"CR27","doi-asserted-by":"crossref","first-page":"35","DOI":"10.1080\/07408178908966205","volume":"21","author":"R. Suri","year":"1989","unstructured":"Suri, R., and Leung, Y. T. 1989. Single run optimization of discrete event simulations?An empirical study using the M\/M\/1 queue. IIE Trans. 21: 35?49.","journal-title":"IIE Trans."},{"key":"CR28","unstructured":"Theocharous, G., and Kaelbling, P. L. 2004. Approximate planning in POMDPS with macro-actions. Advances in Neural Information Processing Systems 16 (NIPS-03). Cambridge, MA: MIT Press. 775-782."},{"key":"CR29","first-page":"279","volume":"8","author":"C. Watkins","year":"1992","unstructured":"Watkins, C., and Dayan, P. 1992. Q-learning. Mach. Learn. 8: 279?292.","journal-title":"Mach. Learn."}],"container-title":["Discrete Event Dynamic Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10626-004-6211-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10626-004-6211-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10626-004-6211-4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,30]],"date-time":"2019-05-30T19:58:49Z","timestamp":1559246329000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10626-004-6211-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2005,6]]},"references-count":29,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2005,6]]}},"alternative-id":["6211"],"URL":"https:\/\/doi.org\/10.1007\/s10626-004-6211-4","relation":{},"ISSN":["0924-6703","1573-7594"],"issn-type":[{"value":"0924-6703","type":"print"},{"value":"1573-7594","type":"electronic"}],"subject":[],"published":{"date-parts":[[2005,6]]}}}