{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,22]],"date-time":"2024-10-22T18:24:10Z","timestamp":1729621450425,"version":"3.28.0"},"reference-count":45,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,8,1]],"date-time":"2020-08-01T00:00:00Z","timestamp":1596240000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,8,1]],"date-time":"2020-08-01T00:00:00Z","timestamp":1596240000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,8,1]],"date-time":"2020-08-01T00:00:00Z","timestamp":1596240000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,8]]},"DOI":"10.1109\/case48305.2020.9217023","type":"proceedings-article","created":{"date-parts":[[2020,10,8]],"date-time":"2020-10-08T20:01:51Z","timestamp":1602187311000},"page":"1514-1519","source":"Crossref","is-referenced-by-count":7,"title":["A 3D Simulation Environment and Navigation Approach for Robot Navigation via Deep Reinforcement Learning in Dense Pedestrian Environment"],"prefix":"10.1109","author":[{"given":"Qi","family":"Liu","sequence":"first","affiliation":[]},{"given":"Yanjie","family":"Li","sequence":"additional","affiliation":[]},{"given":"Lintao","family":"Liu","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"journal-title":"arXiv preprint arXiv 1707 06347","article-title":"Proximal policy optimization algorithms","year":"2017","author":"schulman","key":"ref39"},{"key":"ref38","first-page":"5048","article-title":"Hindsight experience replay","author":"andrychowicz","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref33","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1991.3.1.88"},{"key":"ref31","first-page":"4565","article-title":"Generative adversarial imitation learning","author":"ho","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460968"},{"key":"ref37","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v30i1.10295","article-title":"Deep reinforcement learning with double q-learning","author":"van hasselt","year":"2016","journal-title":"THIRTIETH AAAI Conference on Artificial Intelligence"},{"journal-title":"arXiv preprint arXiv 1509 02971","article-title":"Continuous control with deep reinforcement learning","year":"2015","author":"lillicrap","key":"ref36"},{"key":"ref35","first-page":"5279","article-title":"Scalable trust-region method for deep reinforcement learning using kronecker-factored approximation","author":"wu","year":"2017","journal-title":"Advances in neural information processing systems"},{"journal-title":"arXiv preprint arXiv 1611 01224","article-title":"Sample efficient actor-critic with experience replay","year":"2016","author":"wang","key":"ref34"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TSSC.1968.300136"},{"journal-title":"arXiv preprint arXiv 1802 09085","article-title":"Addressing function approximation error in actor-critic methods","year":"2018","author":"fujimoto","key":"ref40"},{"journal-title":"arXiv preprint arXiv 1801 01000","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","year":"2018","author":"haarnoja","key":"ref11"},{"journal-title":"arXiv preprint arXiv 1812 02588","article-title":"Soft actor-critic algorithms and applications","year":"2018","author":"haarnoja","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/BF01386390"},{"journal-title":"Rapidly-Exploring Random Trees A New Tool for Path Planning","year":"1998","author":"lavalle","key":"ref14"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/70.508439"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CEC.2000.870304"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1163\/1568553042674662"},{"key":"ref18","doi-asserted-by":"crossref","first-page":"23","DOI":"10.1109\/100.580977","article-title":"The dynamic window approach to collision avoidance","volume":"4","author":"fox","year":"1997","journal-title":"IEEE Robotics & Automation Magazine"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1177\/027836499801700706"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202134"},{"key":"ref4","first-page":"183","article-title":"Madem: a multi-modal decision making for social mas","author":"grimaldo","year":"2008","journal-title":"Proceedings of the 7th International Joint Conference on Autonomous Agents and Multiagent Systems"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"journal-title":"Fundamentals of Artificial Neural Networks","year":"1995","author":"hassoun","key":"ref3"},{"key":"ref6","first-page":"5","article-title":"Ros: an open-source robot operating system","volume":"3","author":"quigley","year":"2009","journal-title":"ICRA Workshop on Open Source Software"},{"journal-title":"arXiv preprint arXiv 1804 00209","article-title":"Curiosity-driven exploration for mapless navigation with deep reinforcement learning","year":"2018","author":"zhelo","key":"ref29"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2004.1389727"},{"journal-title":"pedsim ros","year":"2018","author":"billy okal","key":"ref8"},{"journal-title":"Stable Baselines","year":"2018","author":"hill","key":"ref7"},{"volume":"135","journal-title":"Introduction to Reinforcement Learning","year":"1998","author":"sutton","key":"ref2"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevE.51.4282"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2008.10.024"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2008.4543489"},{"key":"ref45","first-page":"1352","article-title":"Reinforcement learning with deep energy-based policies","author":"haarnoja","year":"2017","journal-title":"Proceedings of the 34th International Conference on Machine Learning-Volume 70"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593871"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1007\/978-3-642-19457-3_1","article-title":"Reciprocal n-body collision avoidance","author":"van den berg","year":"2011","journal-title":"Robotics Research"},{"key":"ref42","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"2000","journal-title":"Advances in neural information processing systems"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794134"},{"key":"ref41","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"journal-title":"Modeling Purposeful Adaptive Behavior with the Principle of Maximum Causal Entropy","year":"2010","author":"ziebart","key":"ref44"},{"journal-title":"arXiv preprint arXiv 1312 5602","article-title":"Playing atari with deep reinforcement learning","year":"2013","author":"mnih","key":"ref26"},{"key":"ref43","first-page":"1433","article-title":"Maximum entropy inverse reinforcement learning","volume":"8","author":"ziebart","year":"2008","journal-title":"AAAI"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/RCAR.2016.7784001"}],"event":{"name":"2020 IEEE 16th International Conference on Automation Science and Engineering (CASE)","start":{"date-parts":[[2020,8,20]]},"location":"Hong Kong, Hong Kong","end":{"date-parts":[[2020,8,21]]}},"container-title":["2020 IEEE 16th International Conference on Automation Science and Engineering (CASE)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9210430\/9216730\/09217023.pdf?arnumber=9217023","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,22]],"date-time":"2022-11-22T17:59:43Z","timestamp":1669139983000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9217023\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,8]]},"references-count":45,"URL":"https:\/\/doi.org\/10.1109\/case48305.2020.9217023","relation":{},"subject":[],"published":{"date-parts":[[2020,8]]}}}