{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:02:09Z","timestamp":1740099729693,"version":"3.37.3"},"reference-count":29,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,8,1]],"date-time":"2020-08-01T00:00:00Z","timestamp":1596240000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,8,1]],"date-time":"2020-08-01T00:00:00Z","timestamp":1596240000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,8,1]],"date-time":"2020-08-01T00:00:00Z","timestamp":1596240000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100003725","name":"Basic Science Research Program through the National Research Foundation of Korea (NRF","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012491","name":"Ministry of Education","doi-asserted-by":"publisher","award":["NRF-2018R1D1A1B07049267"],"id":[{"id":"10.13039\/501100012491","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,8]]},"DOI":"10.1109\/case48305.2020.9249227","type":"proceedings-article","created":{"date-parts":[[2020,11,13]],"date-time":"2020-11-13T22:19:58Z","timestamp":1605305998000},"page":"921-927","source":"Crossref","is-referenced-by-count":2,"title":["Reinforcement Learning with Converging Goal Space and Binary Reward Function"],"prefix":"10.1109","author":[{"given":"Wooseok","family":"Ro","sequence":"first","affiliation":[]},{"given":"Wonseok","family":"Jeon","sequence":"additional","affiliation":[]},{"given":"Hamid","family":"Bamshad","sequence":"additional","affiliation":[]},{"given":"Hyunseok","family":"Yang","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"journal-title":"OpenAI Gym","year":"2016","author":"brockman","key":"ref10"},{"journal-title":"OpenAI Baselines","year":"2017","author":"dhariwal","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"journal-title":"The behavior of organisms An experimental analysis BF Skinner Foundation","year":"1990","author":"skinner","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/5988.001.0001"},{"key":"ref15","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","volume":"99","author":"ng","year":"1999","journal-title":"ICML"},{"key":"ref16","first-page":"463","article-title":"Learning to drive a bicycle using reinforcement learning and shaping","author":"randl?v","year":"0","journal-title":"Proc of the Fifteenth International Conf on Machine Learning Ser ICML"},{"key":"ref17","first-page":"670","article-title":"Training and tracking in robotics","author":"selfridge","year":"0","journal-title":"Proceedings of the 9th International Joint Conference on Artificial Intelligence - Volume 1 ser IJCAI’ 85"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143906"},{"key":"ref19","first-page":"840","article-title":"Reward function and initial values: better choices for accelerated goal-directed reinforcement learning","author":"matignon","year":"0","journal-title":"International Conference on Artificial Neural Networks"},{"journal-title":"Continuous control with deep reinforcement learning","year":"2015","author":"lillicrap","key":"ref28"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref27","first-page":"1312","article-title":"Universal value function approximators","author":"schaul","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1177\/0278364907084980"},{"key":"ref6","first-page":"440","article-title":"The influence of reward on the speed of reinforcement learning: An analysis of shaping","author":"laud","year":"0","journal-title":"Proceedings of the 20th International Conference on Machine Learning (ICML-03)"},{"journal-title":"Adam A method for stochastic optimization","year":"2014","author":"kingma","key":"ref29"},{"key":"ref5","first-page":"1","article-title":"Reinforcement learning for humanoid robotics","author":"peters","year":"0","journal-title":"Proceedings of the Third IEEE-RAS International Conference on Humanoid Robots"},{"key":"ref8","volume":"2","author":"sutton","year":"1998","journal-title":"Introduction to Reinforcement Learning"},{"key":"ref7","article-title":"Hindsight experience replay","volume":"abs 1707 1495","author":"andrychowicz","year":"2017","journal-title":"CoRR"},{"journal-title":"Playing atari with deep reinforcement learning","year":"2013","author":"mnih","key":"ref2"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of go with deep neural networks and tree search","volume":"529","author":"silver","year":"2016","journal-title":"Nature"},{"journal-title":"Reverse curriculum generation for reinforcement learning","year":"2017","author":"florensa","key":"ref20"},{"journal-title":"Archer Aggressive rewards to counter bias in hindsight experience replay","year":"2018","author":"lanka","key":"ref22"},{"journal-title":"Multi-goal reinforcement learning Challenging robotics environments and request for research","year":"2018","author":"plappert","key":"ref21"},{"journal-title":"Learning complex dexterous manipulation with deep reinforcement learning and demonstrations","year":"2017","author":"rajeswaran","key":"ref24"},{"journal-title":"Balance between efficient and effective learning Dense2sparse reward shaping for robot manipulation with environment uncertainty","year":"2020","author":"luo","key":"ref23"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"ref25","article-title":"Sim-to-real transfer of robotic control with dynamics randomization","author":"peng","year":"2017","journal-title":"CoRR"}],"event":{"name":"2020 IEEE 16th International Conference on Automation Science and Engineering (CASE)","start":{"date-parts":[[2020,8,20]]},"location":"Hong Kong, Hong Kong","end":{"date-parts":[[2020,8,21]]}},"container-title":["2020 IEEE 16th International Conference on Automation Science and Engineering (CASE)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9210430\/9216730\/09249227.pdf?arnumber=9249227","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,27]],"date-time":"2022-06-27T15:46:35Z","timestamp":1656344795000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9249227\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,8]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/case48305.2020.9249227","relation":{},"subject":[],"published":{"date-parts":[[2020,8]]}}}