{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T04:49:32Z","timestamp":1725684572946},"reference-count":29,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,9,1]],"date-time":"2020-09-01T00:00:00Z","timestamp":1598918400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,9,1]],"date-time":"2020-09-01T00:00:00Z","timestamp":1598918400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,9,1]],"date-time":"2020-09-01T00:00:00Z","timestamp":1598918400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100004543","name":"China Scholarship Council (CSC), CSC","doi-asserted-by":"publisher","award":["201706990015"],"id":[{"id":"10.13039\/501100004543","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,9]]},"DOI":"10.1109\/synasc51798.2020.00033","type":"proceedings-article","created":{"date-parts":[[2021,2,24]],"date-time":"2021-02-24T21:09:52Z","timestamp":1614200992000},"page":"149-152","source":"Crossref","is-referenced-by-count":6,"title":["Tackling Morpion Solitaire with AlphaZero-like Ranked Reward Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Hui","family":"Wang","sequence":"first","affiliation":[]},{"given":"Mike","family":"Preuss","sequence":"additional","affiliation":[]},{"given":"Michael","family":"Emmerich","sequence":"additional","affiliation":[]},{"given":"Aske","family":"Plaat","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TCIAIG.2012.2186810"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2010.09.010"},{"journal-title":"A Multi-task Selected Learning Approach for Solving New Type 3D Bin Packing Problem","year":"2018","author":"hu","key":"ref13"},{"journal-title":"Ranked reward Enabling self-play reinforcement learning for combinatorial optimization","year":"2018","author":"laterre","key":"ref14"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"journal-title":"Elf opengo An analysis and open reimplementation of alphazero","year":"2019","author":"tian","key":"ref16"},{"journal-title":"Analysis of hyperparameters for small games Iterations or epochs in self-play?","year":"2020","author":"wang","key":"ref17"},{"journal-title":"Monte Carlo Q-learning for General Game Playing","year":"2018","author":"wang","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-31978-6_11"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/SSCI44817.2019.9002814"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6404"},{"journal-title":"Morpion solitaire 5d a new upper bound of 121 on the maximum score","year":"2013","author":"kawamura","key":"ref27"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58115-2_37"},{"journal-title":"Hyper-parameter sweep on alphazero general","year":"2019","author":"wang","key":"ref29"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1038\/nature25978"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/s00224-005-1240-4"},{"journal-title":"Morpion Solitaire","year":"2020","author":"boyer","key":"ref7"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-59238-7"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2014.09.003"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"journal-title":"A0c Alpha zero in continuous action space","year":"2018","author":"moerland","key":"ref20"},{"journal-title":"Neural Combinatorial Optimization with Reinforcement Learning","year":"2016","author":"bello","key":"ref22"},{"key":"ref21","first-page":"2692","article-title":"Pointer networks","author":"vinyals","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref24","article-title":"Nested monte-carlo search","author":"cazenave","year":"0","journal-title":"Twenty-First International Joint Conference on Artificial Intelligence"},{"journal-title":"Solving hard ai planning instances using curriculum-driven deep reinforcement learning","year":"2020","author":"feng","key":"ref23"},{"journal-title":"Beam nested rollout policy adaptation","year":"2012","author":"cazenave","key":"ref26"},{"key":"ref25","article-title":"Nested rollout policy adaptation for monte carlo tree search","author":"rosin","year":"0","journal-title":"Twenty-Second International Joint Conference on Artificial Intelligence"}],"event":{"name":"2020 22nd International Symposium on Symbolic and Numeric Algorithms for Scientific Computing (SYNASC)","start":{"date-parts":[[2020,9,1]]},"location":"Timisoara, Romania","end":{"date-parts":[[2020,9,4]]}},"container-title":["2020 22nd International Symposium on Symbolic and Numeric Algorithms for Scientific Computing (SYNASC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9356934\/9356935\/09356942.pdf?arnumber=9356942","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,30]],"date-time":"2022-06-30T15:16:39Z","timestamp":1656602199000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9356942\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,9]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/synasc51798.2020.00033","relation":{},"subject":[],"published":{"date-parts":[[2020,9]]}}}