{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T04:53:36Z","timestamp":1725684816486},"publisher-location":"Berlin, Heidelberg","reference-count":17,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642299452"},{"type":"electronic","value":"9783642299469"}],"license":[{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-29946-9_31","type":"book-chapter","created":{"date-parts":[[2012,5,18]],"date-time":"2012-05-18T17:01:49Z","timestamp":1337360509000},"page":"321-332","source":"Crossref","is-referenced-by-count":6,"title":["Compound Reinforcement Learning: Theory and an Application to Finance"],"prefix":"10.1007","author":[{"given":"Tohgoroh","family":"Matsui","sequence":"first","affiliation":[]},{"given":"Takashi","family":"Goto","sequence":"additional","affiliation":[]},{"given":"Kiyoshi","family":"Izumi","sequence":"additional","affiliation":[]},{"given":"Yu","family":"Chen","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"issue":"4","key":"31_CR1","doi-asserted-by":"publisher","first-page":"880","DOI":"10.1287\/moor.1080.0324","volume":"33","author":"A. Basu","year":"2008","unstructured":"Basu, A., Bhattacharyya, T., Borkar, V.S.: A learning algorithm for risk-sensitive cost. Mathematics of Operations Research\u00a033(4), 880\u2013898 (2008)","journal-title":"Mathematics of Operations Research"},{"issue":"2","key":"31_CR2","doi-asserted-by":"publisher","first-page":"294","DOI":"10.1287\/moor.27.2.294.324","volume":"27","author":"V.S. Borkar","year":"2002","unstructured":"Borkar, V.S.: Q-learning for risk-sensitive control. Mathematics of Operations Research\u00a027(2), 294\u2013311 (2002)","journal-title":"Mathematics of Operations Research"},{"key":"31_CR3","doi-asserted-by":"crossref","unstructured":"Campbell, J.Y., Lo, A.W., Graig MacKinlay, A.: The Econometrics of Financial Markets. Princeton University Press (1997)","DOI":"10.1515\/9781400830213"},{"key":"31_CR4","unstructured":"CMA. Global sovereign credit risk report, 4th quarter 2010. Credit Market Analysis, Ltd. (CMA) (2011)"},{"key":"31_CR5","doi-asserted-by":"crossref","first-page":"81","DOI":"10.1613\/jair.1666","volume":"24","author":"P. Geibel","year":"2005","unstructured":"Geibel, P., Wysotzki, F.: Risk-sensitive reinforcement learning applied to control under constraints. Journal of Artificial Intelligence Research\u00a024, 81\u2013108 (2005)","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"1","key":"31_CR6","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/B:MACH.0000019802.64038.6c","volume":"55","author":"A. Gosavi","year":"2004","unstructured":"Gosavi, A.: A reinforcement learning algorithm based on policy iteration for average reward: Empirical results with yield management and convergence analysis. Machine Learning\u00a055(1), 5\u201329 (2004)","journal-title":"Machine Learning"},{"key":"31_CR7","doi-asserted-by":"crossref","unstructured":"Heger, M.: Consideration of risk in reinforcement learning. In: Proc. of the Eleventh International Conference on Machine Learning, ICML 1994, pp. 105\u2013111 (1994)","DOI":"10.1016\/B978-1-55860-335-6.50021-0"},{"key":"31_CR8","doi-asserted-by":"crossref","first-page":"917","DOI":"10.1002\/j.1538-7305.1956.tb03809.x","volume":"35","author":"J.L. Kelly Jr.","year":"1956","unstructured":"Kelly Jr., J.L.: A new interpretation of information rate. Bell System Technical Journal\u00a035, 917\u2013926 (1956)","journal-title":"Bell System Technical Journal"},{"issue":"2-3","key":"31_CR9","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1023\/A:1017940631555","volume":"49","author":"O. Mihatsch","year":"2002","unstructured":"Mihatsch, O., Neuneier, R.: Risk-sensitive reinforcement learning. Machine Learning\u00a049(2-3), 267\u2013290 (2002)","journal-title":"Machine Learning"},{"key":"31_CR10","unstructured":"Poundstone, W.: Fortune\u2019s Formula: The untold story of the scientific betting system that beat the casinos and wall street. Hill and Wang (2005)"},{"key":"31_CR11","unstructured":"Sato, M., Kobayashi, S.: Average-reward reinforcement learning for variance penalized Markov decision problems. In: Proc. of the Eighteenth International Conference on Machine Learning, ICML 2001, pp. 473\u2013480 (2001)"},{"key":"31_CR12","doi-asserted-by":"crossref","unstructured":"Schwartz, A.: A reinforcement learning method for maximizing undiscounted rewards. In: Proc. of the Tenth International Conference on Machine Learning (ICML 1993), pp. 298\u2013305 (1993)","DOI":"10.1016\/B978-1-55860-307-3.50045-9"},{"key":"31_CR13","unstructured":"Singh, S.P.: Reinforcement learning algorithms for average-payoff Markovian decision processes. In: Proc. of the Twelfth National Conference on Artificial Intelligence (AAAI 1994), vol.\u00a01, pp. 700\u2013705 (1994)"},{"key":"31_CR14","doi-asserted-by":"crossref","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. The MIT Press (1998)","DOI":"10.1109\/TNN.1998.712192"},{"key":"31_CR15","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1023\/A:1017980312899","volume":"49","author":"J.N. Tsitsiklis","year":"2002","unstructured":"Tsitsiklis, J.N., Van Roy, B.: On average versus discounted reward temporal-difference learning. Machine Learning\u00a049, 179\u2013191 (2002)","journal-title":"Machine Learning"},{"key":"31_CR16","unstructured":"Vince, R.: Portfolio management formulas: mathematical trading methods for the futures, options, and stock markets. Wiley (1990)"},{"issue":"3\/4","key":"31_CR17","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1023\/A:1022676722315","volume":"8","author":"C.J.C.H. Watkins","year":"1992","unstructured":"Watkins, C.J.C.H., Dayan, P.: Technical note: Q-learning. Machine Learning\u00a08(3\/4), 279\u2013292 (1992)","journal-title":"Machine Learning"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in Reinforcement Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-29946-9_31","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,7,7]],"date-time":"2020-07-07T21:28:29Z","timestamp":1594157309000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-29946-9_31"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642299452","9783642299469"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-29946-9_31","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}