{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,22]],"date-time":"2024-10-22T17:32:48Z","timestamp":1729618368455,"version":"3.28.0"},"reference-count":30,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,7]]},"DOI":"10.1109\/icme46284.2020.9102924","type":"proceedings-article","created":{"date-parts":[[2020,6,9]],"date-time":"2020-06-09T21:40:07Z","timestamp":1591738807000},"page":"1-6","source":"Crossref","is-referenced-by-count":2,"title":["State Representation Learning For Effective Deep Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Jian","family":"Zhao","sequence":"first","affiliation":[]},{"given":"Wengang","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Tianyu","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Yun","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Houqiang","family":"Li","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"journal-title":"Github Repository","article-title":"Openai baselines","year":"2017","author":"dhariwal","key":"ref30"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2019.00058"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2019.00030"},{"key":"ref12","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11796","article-title":"Rainbow: Combining improvements in deep reinforcement learning","author":"hessel","year":"2018","journal-title":"AAAI Conference on Artificial Intelligence (AAAI)"},{"journal-title":"International Conference on Machine Learning (ICML)","article-title":"Trust region policy optimization","year":"2015","author":"schulman","key":"ref13"},{"key":"ref14","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v34i04.6049","article-title":"Attentive experience replay","author":"sun","year":"2020","journal-title":"AAAI Conference on Artificial Intelligence (AAAI)"},{"journal-title":"Advances in Neural IInformation Processing Systems","article-title":"Learning from demonstration","year":"1997","author":"schaal","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_1"},{"journal-title":"AAAI Conference on Artificial Intelligence (AAAI)","article-title":"Deep reinforcement learning with double q-learning","year":"2016","author":"hasselt","key":"ref17"},{"journal-title":"International Conference on Machine Learning (ICML)","article-title":"Asynchronous methods for deep reinforcement learning","year":"2016","author":"mnih","key":"ref18"},{"journal-title":"arXiv 1707 06347","article-title":"Proximal policy optimization algorithms","year":"2017","author":"schulman","key":"ref19"},{"journal-title":"arXiv 1606 01540 [cs]","article-title":"Openai gym","year":"2016","author":"brockman","key":"ref28"},{"journal-title":"Advances in Neural IInformation Processing Systems","article-title":"Policy gradient methods for reinforcement learning with function approximation","year":"2000","author":"sutton","key":"ref4"},{"journal-title":"arXiv preprint arXiv 1906 03008","article-title":"Unsupervised state representation learning in atari","year":"2019","author":"anand","key":"ref27"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2002.1014237"},{"journal-title":"Advances in Neural IInformation Processing Systems","article-title":"Analysis of temporal-diffference learning with function approximation","year":"1997","author":"tsitsiklis","key":"ref6"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1007\/BF00992698","article-title":"Q-learning","volume":"8","author":"watkins","year":"1992","journal-title":"Machine Learning"},{"journal-title":"Advances in Neural IInformation Processing Systems","article-title":"Imagenet classification with deep convolutional neural networks","year":"2012","author":"krizhevsky","key":"ref8"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TCIAIG.2013.2294713"},{"journal-title":"arXiv 1509 02971","article-title":"Continuous control with deep reinforcement learning","year":"2015","author":"lillicrap","key":"ref2"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2019.00109"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref20","first-page":"1109","article-title":"Large scale online learning of image similarity through ranking","volume":"11","author":"chechik","year":"2010","journal-title":"Journal of Machine Learning Research (JMLR)"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.320"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2019.00193"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-015-9459-7"},{"journal-title":"International Conference on Autonomous Agents and Multiagent Systems (AAMAS)","article-title":"Playing atari with six neurons","year":"2019","author":"cuccu","key":"ref23"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2016.7759578"},{"key":"ref25","first-page":"2746","article-title":"Embed to control: A locally linear latent dynamics model for control from raw images","author":"watter","year":"2015","journal-title":"Advances in Neural IInformation Processing Systems"}],"event":{"name":"2020 IEEE International Conference on Multimedia and Expo (ICME)","start":{"date-parts":[[2020,7,6]]},"location":"London, UK","end":{"date-parts":[[2020,7,10]]}},"container-title":["2020 IEEE International Conference on Multimedia and Expo (ICME)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9099125\/9102711\/09102924.pdf?arnumber=9102924","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,27]],"date-time":"2022-10-27T12:56:50Z","timestamp":1666875410000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9102924\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/icme46284.2020.9102924","relation":{},"subject":[],"published":{"date-parts":[[2020,7]]}}}