{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T09:21:45Z","timestamp":1730280105763,"version":"3.28.0"},"reference-count":18,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,11]]},"DOI":"10.1109\/la-cci.2018.8625243","type":"proceedings-article","created":{"date-parts":[[2019,1,25]],"date-time":"2019-01-25T02:41:37Z","timestamp":1548384097000},"page":"1-6","source":"Crossref","is-referenced-by-count":21,"title":["Action Selection Methods in a Robotic Reinforcement Learning Scenario"],"prefix":"10.1109","author":[{"given":"Francisco","family":"Cruz","sequence":"first","affiliation":[]},{"given":"Peter","family":"Wuppen","sequence":"additional","affiliation":[]},{"given":"Alvin","family":"Fazrie","sequence":"additional","affiliation":[]},{"given":"Cornelius","family":"Weber","sequence":"additional","affiliation":[]},{"given":"Stefan","family":"Wermter","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-24942-6"},{"journal-title":"Machine Learning An Algorithmic Perspective","year":"2015","author":"marsland","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2007.368199"},{"key":"ref13","article-title":"On-line Q-learning using connectionist systems","author":"rummery","year":"1994","journal-title":"technical report CUED\/F-Infeng\/TR"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref15","article-title":"Efficient exploration in reinforcement learning","author":"thrun","year":"1992","journal-title":"Technical Report EER\/865072"},{"key":"ref16","first-page":"397","article-title":"Using confidence bounds for exploitation-exploration trade-offs","volume":"3","author":"auer","year":"2003","journal-title":"Journal of Machine Learning Research"},{"key":"ref17","article-title":"Explorations in Efficient Reinforcement Learning","author":"wiering","year":"1999","journal-title":"Ph D Dissertation"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2016.2543839"},{"key":"ref4","article-title":"Learning from Delayed Rewards","author":"watkins","year":"1989","journal-title":"Ph D Dissertation"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1098\/rstb.2007.2098"},{"key":"ref6","first-page":"203","article-title":"?-greedy exploration in reinforcement learning based on value differences","author":"tokic","year":"0","journal-title":"Proceedings of the Conference on Artificial Intelligence"},{"key":"ref5","first-page":"211","article-title":"Training stochastic model recognition algorithms as networks can lead to maximum mutual information estimates of parameters","author":"bridle","year":"1990","journal-title":"Proceedings of the 1989 Conference on Advances in Neural Information Processing Systems NIPS"},{"key":"ref8","article-title":"Algorithms for Reinforcement Learning. Synthesis Lectures on Artificial Intelligence and Machine Learning","author":"szepesv?ri","year":"2010","journal-title":"Morgan & Claypool"},{"key":"ref7","first-page":"335","article-title":"Value-difference based exploration: Adaptive control between ?-greedy and softmax","author":"tokic","year":"2011","journal-title":"Proceedings of the 34th Annual German Conference on Advances in Artificial Intelligence"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jmp.2008.12.005"},{"key":"ref9","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","author":"puterman","year":"1994","journal-title":"Markov Decision Processes Discrete Stochastic Dynamic Programming"}],"event":{"name":"2018 IEEE Latin American Conference on Computational Intelligence (LA-CCI)","start":{"date-parts":[[2018,11,7]]},"location":"Gudalajara, Mexico","end":{"date-parts":[[2018,11,9]]}},"container-title":["2018 IEEE Latin American Conference on Computational Intelligence (LA-CCI)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8609898\/8625198\/08625243.pdf?arnumber=8625243","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,26]],"date-time":"2022-01-26T23:58:21Z","timestamp":1643241501000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8625243\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,11]]},"references-count":18,"URL":"https:\/\/doi.org\/10.1109\/la-cci.2018.8625243","relation":{},"subject":[],"published":{"date-parts":[[2018,11]]}}}