{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,11,19]],"date-time":"2024-11-19T19:01:58Z","timestamp":1732042918830,"version":"3.28.0"},"publisher-location":"New York, NY, USA","reference-count":63,"publisher":"ACM","funder":[{"DOI":"10.13039\/100017052","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61972372, U19A2079, 62121002"],"id":[{"id":"10.13039\/100017052","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2021YFF0901603"],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,7,19]]},"DOI":"10.1145\/3539618.3591636","type":"proceedings-article","created":{"date-parts":[[2023,7,19]],"date-time":"2023-07-19T00:22:23Z","timestamp":1689726143000},"page":"238-248","update-policy":"http:\/\/dx.doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":16,"title":["Alleviating Matthew Effect of Offline Reinforcement Learning in Interactive Recommendation"],"prefix":"10.1145","author":[{"ORCID":"http:\/\/orcid.org\/0000-0002-5187-9196","authenticated-orcid":false,"given":"Chongming","family":"Gao","sequence":"first","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}]},{"ORCID":"http:\/\/orcid.org\/0009-0001-4868-0952","authenticated-orcid":false,"given":"Kexin","family":"Huang","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-4752-2629","authenticated-orcid":false,"given":"Jiawei","family":"Chen","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-7849-208X","authenticated-orcid":false,"given":"Yuan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Kuaishou Technology Co., Ltd., Beijing, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0001-5667-5347","authenticated-orcid":false,"given":"Biao","family":"Li","sequence":"additional","affiliation":[{"name":"Kuaishou Technology Co., Ltd., Beijing, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-9266-0780","authenticated-orcid":false,"given":"Peng","family":"Jiang","sequence":"additional","affiliation":[{"name":"Kuaishou Technology Co., Ltd., Beijing, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-5369-884X","authenticated-orcid":false,"given":"Shiqi","family":"Wang","sequence":"additional","affiliation":[{"name":"Chongqing University, Chongqing, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0003-1349-9755","authenticated-orcid":false,"given":"Zhong","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, Chengdu, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0001-8472-7992","authenticated-orcid":false,"given":"Xiangnan","family":"He","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}]}],"member":"320","published-online":{"date-parts":[[2023,7,18]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543846"},{"key":"e_1_3_2_1_2_1","volume-title":"International Conference on Machine Learning (ICML '20)","author":"Agarwal Rishabh","year":"2020","unstructured":"Rishabh Agarwal, Dale Schuurmans, and Mohammad Norouzi. 2020. An optimistic perspective on offline reinforcement learning. In International Conference on Machine Learning (ICML '20). PMLR, 104--114."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380281"},{"key":"e_1_3_2_1_4_1","volume-title":"2023 a. Reinforcing User Retention in a Billion Scale Short Video Recommender System. arXiv preprint arXiv:2302.01724","author":"Cai Qingpeng","year":"2023","unstructured":"Qingpeng Cai, Shuchang Liu, Xueliang Wang, Tianyou Zuo, Wentao Xie, Bin Yang, Dong Zheng, Peng Jiang, and Kun Gai. 2023 a. Reinforcing User Retention in a Billion Scale Short Video Recommender System. arXiv preprint arXiv:2302.01724 (2023)."},{"key":"e_1_3_2_1_5_1","volume-title":"2023 b. Two-Stage Constrained Actor-Critic for Short Video Recommendation. arXiv preprint arXiv:2302.01680","author":"Cai Qingpeng","year":"2023","unstructured":"Qingpeng Cai, Zhenghai Xue, Chi Zhang, Wanqi Xue, Shuchang Liu, Ruohan Zhan, Xueliang Wang, Tianyou Zuo, Wentao Xie, Dong Zheng, et al. 2023 b. Two-Stage Constrained Actor-Critic for Short Video Recommendation. arXiv preprint arXiv:2302.01680 (2023)."},{"volume-title":"Proceedings of the Twelfth ACM International Conference on Web Search and Data Mining (Melbourne VIC, Australia) (WSDM '19)","author":"Chen Minmin","key":"e_1_3_2_1_6_1","unstructured":"Minmin Chen, Alex Beutel, Paul Covington, Sagar Jain, Francois Belletti, and Ed H. Chi. 2019. Top-K Off-Policy Correction for a REINFORCE Recommender System. In Proceedings of the Twelfth ACM International Conference on Web Search and Data Mining (Melbourne VIC, Australia) (WSDM '19). 456--464."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3523227.3546758"},{"key":"e_1_3_2_1_8_1","volume-title":"Offline Evaluation for Reinforcement Learning-based Recommendation: A Critical Issue and Some Alternatives. arXiv preprint arXiv:2301.00993","author":"Deffayet Romain","year":"2023","unstructured":"Romain Deffayet, Thibaut Thonet, Jean-Michel Renders, and Maarten de Rijke. 2023. Offline Evaluation for Reinforcement Learning-based Recommendation: A Critical Issue and Some Alternatives. arXiv preprint arXiv:2301.00993 (2023)."},{"key":"e_1_3_2_1_9_1","volume-title":"Visual foresight: Model-based deep reinforcement learning for vision-based robotic control. arXiv preprint arXiv:1812.00568","author":"Ebert Frederik","year":"2018","unstructured":"Frederik Ebert, Chelsea Finn, Sudeep Dasari, Annie Xie, Alex Lee, and Sergey Levine. 2018. Visual foresight: Model-based deep reinforcement learning for vision-based robotic control. arXiv preprint arXiv:1812.00568 (2018)."},{"key":"e_1_3_2_1_10_1","volume-title":"Benchmarking batch deep reinforcement learning algorithms. arXiv preprint arXiv:1910.01708","author":"Fujimoto Scott","year":"2019","unstructured":"Scott Fujimoto, Edoardo Conti, Mohammad Ghavamzadeh, and Joelle Pineau. 2019a. Benchmarking batch deep reinforcement learning algorithms. arXiv preprint arXiv:1910.01708 (2019)."},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of the 36th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"2062","author":"Fujimoto Scott","year":"2019","unstructured":"Scott Fujimoto, David Meger, and Doina Precup. 2019b. Off-Policy Deep Reinforcement Learning without Exploration. In Proceedings of the 36th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 97), Kamalika Chaudhuri and Ruslan Salakhutdinov (Eds.). 2052--2062."},{"key":"e_1_3_2_1_12_1","volume-title":"CIRS: Bursting Filter Bubbles by Counterfactual Interactive Recommender System. arXiv preprint arXiv:2204.01266","author":"Gao Chongming","year":"2022","unstructured":"Chongming Gao, Wenqiang Lei, Jiawei Chen, Shiqi Wang, Xiangnan He, Shijun Li, Biao Li, Yuan Zhang, and Peng Jiang. 2022a. CIRS: Bursting Filter Bubbles by Counterfactual Interactive Recommender System. arXiv preprint arXiv:2204.01266 (2022)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aiopen.2021.06.002"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557220"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557624"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531890"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3488560.3498487"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3159652.3159687"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.5555\/3172077.3172127"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441775"},{"key":"e_1_3_2_1_21_1","volume-title":"Generative Adversarial Imitation Learning. In Advances in Neural Information Processing Systems (NeurIPS '16","author":"Ho Jonathan","year":"2016","unstructured":"Jonathan Ho and Stefano Ermon. 2016. Generative Adversarial Imitation Learning. In Advances in Neural Information Processing Systems (NeurIPS '16, Vol. 29), D. Lee, M. Sugiyama, U. Luxburg, I. Guyon, and R. Garnett (Eds.)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531716"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383313.3412252"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3474247"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2018.00035"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295309"},{"key":"e_1_3_2_1_27_1","volume-title":"MOReL: Model-Based Offline Reinforcement Learning. In Advances in Neural Information Processing Systems (NeurIPS '20","volume":"21823","author":"Kidambi Rahul","year":"2020","unstructured":"Rahul Kidambi, Aravind Rajeswaran, Praneeth Netrapalli, and Thorsten Joachims. 2020. MOReL: Model-Based Offline Reinforcement Learning. In Advances in Neural Information Processing Systems (NeurIPS '20, Vol. 33), H. Larochelle, M. Ranzato, R. Hadsell, M.F. Balcan, and H. Lin (Eds.). 21810--21823."},{"key":"e_1_3_2_1_28_1","volume-title":"Actor-critic algorithms. Advances in neural information processing systems","author":"Konda Vijay","year":"1999","unstructured":"Vijay Konda and John Tsitsiklis. 1999. Actor-critic algorithms. Advances in neural information processing systems, Vol. 12 (1999)."},{"key":"e_1_3_2_1_29_1","volume-title":"Offline Reinforcement Learning with Implicit Q-Learning. In International Conference on Learning Representations (ICLR '22)","author":"Kostrikov Ilya","year":"2022","unstructured":"Ilya Kostrikov, Ashvin Nair, and Sergey Levine. 2022. Offline Reinforcement Learning with Implicit Q-Learning. In International Conference on Learning Representations (ICLR '22)."},{"key":"e_1_3_2_1_30_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Kumar Aviral","year":"2019","unstructured":"Aviral Kumar, Justin Fu, Matthew Soh, George Tucker, and Sergey Levine. 2019. Stabilizing off-policy q-learning via bootstrapping error reduction. Advances in Neural Information Processing Systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_31_1","volume-title":"Conservative Q-Learning for Offline Reinforcement Learning. In Advances in Neural Information Processing Systems (NeurIPS '20","volume":"1191","author":"Kumar Aviral","year":"2020","unstructured":"Aviral Kumar, Aurick Zhou, George Tucker, and Sergey Levine. 2020. Conservative Q-Learning for Offline Reinforcement Learning. In Advances in Neural Information Processing Systems (NeurIPS '20, Vol. 33), H. Larochelle, M. Ranzato, R. Hadsell, M.F. Balcan, and H. Lin (Eds.). 1179--1191."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3473325"},{"key":"e_1_3_2_1_33_1","volume-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems. arXiv preprint arXiv:2005.01643","author":"Levine Sergey","year":"2020","unstructured":"Sergey Levine, Aviral Kumar, George Tucker, and Justin Fu. 2020. Offline reinforcement learning: Tutorial, review, and perspectives on open problems. arXiv preprint arXiv:2005.01643 (2020)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462957"},{"key":"e_1_3_2_1_35_1","volume-title":"Exploration and Regularization of the Latent Action Space in Recommendation. arXiv preprint arXiv:2302.03431","author":"Liu Shuchang","year":"2023","unstructured":"Shuchang Liu, Qingpeng Cai, Bowen Sun, Yuhao Wang, Ji Jiang, Dong Zheng, Kun Gai, Peng Jiang, Xiangyu Zhao, and Yongfeng Zhang. 2023. Exploration and Regularization of the Latent Action Space in Recommendation. arXiv preprint arXiv:2302.03431 (2023)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAAI54685.2021.00035"},{"key":"e_1_3_2_1_37_1","volume-title":"Algorithmic framework for model-based deep reinforcement learning with theoretical guarantees. arXiv preprint arXiv:1807.03858","author":"Luo Yuping","year":"2018","unstructured":"Yuping Luo, Huazhe Xu, Yuanzhi Li, Yuandong Tian, Trevor Darrell, and Tengyu Ma. 2018. Algorithmic framework for model-based deep reinforcement learning with theoretical guarantees. arXiv preprint arXiv:1807.03858 (2018)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/2911996.2912004"},{"key":"e_1_3_2_1_39_1","volume-title":"International Conference on Machine Learning (ICML '15)","author":"Swaminathan Adith","year":"2015","unstructured":"Adith Swaminathan and Thorsten Joachims. 2015. Counterfactual Risk Minimization: Learning from Logged Bandit Feedback. In International Conference on Machine Learning (ICML '15). PMLR, 814--823."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3159652.3159656"},{"key":"e_1_3_2_1_41_1","volume-title":"An Audit of Misinformation Filter Bubbles on YouTube: Bubble Bursting and Recent Behavior Changes. In RecSys '21","author":"Tomlein Matus","year":"2021","unstructured":"Matus Tomlein, Branislav Pecher, Jakub Simko, Ivan Srba, Robert Moro, Elena Stefancova, Michal Kompan, Andrea Hrckova, Juraj Podrouzek, and Maria Bielikova. 2021. An Audit of Misinformation Filter Bubbles on YouTube: Bubble Bursting and Recent Behavior Changes. In RecSys '21. 1--11."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCCBDA.2018.8386490"},{"key":"e_1_3_2_1_43_1","volume-title":"What are the statistical limits of offline RL with linear function approximation? arXiv preprint arXiv:2010.11895","author":"Wang Ruosong","year":"2020","unstructured":"Ruosong Wang, Dean P Foster, and Sham M Kakade. 2020a. What are the statistical limits of offline RL with linear function approximation? arXiv preprint arXiv:2010.11895 (2020)."},{"key":"e_1_3_2_1_44_1","volume-title":"Who Are the Best Adopters? User Selection Model for Free Trial Item Promotion","author":"Wang Shiqi","year":"2022","unstructured":"Shiqi Wang, Chongming Gao, Min Gao, Junliang Yu, Zongwei Wang, and Hongzhi Yin. 2022a. Who Are the Best Adopters? User Selection Model for Free Trial Item Promotion. IEEE Transactions on Big Data (2022)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539073"},{"key":"e_1_3_2_1_46_1","first-page":"7768","article-title":"Critic Regularized Regression","volume":"33","author":"Wang Ziyu","year":"2020","unstructured":"Ziyu Wang, Alexander Novikov, Konrad Zolna, Josh S Merel, Jost Tobias Springenberg, Scott E Reed, Bobak Shahriari, Noah Siegel, Caglar Gulcehre, Nicolas Heess, et al. 2020b. Critic Regularized Regression. Advances in Neural Information Processing Systems, Vol. 33 (2020), 7768--7778.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_47_1","volume-title":"Machine learning","author":"Watkins Christopher JCH","year":"1992","unstructured":"Christopher JCH Watkins and Peter Dayan. 1992. Q-learning. Machine learning, Vol. 8, 3 (1992), 279--292."},{"key":"e_1_3_2_1_48_1","volume-title":"Dynamics-Aware Adaptation for Reinforcement Learning Based Cross-Domain Interactive Recommendation (SIGIR '22)","author":"Wu Junda","year":"2022","unstructured":"Junda Wu, Zhihui Xie, Tong Yu, Handong Zhao, Ruiyi Zhang, and Shuai Li. 2022. Dynamics-Aware Adaptation for Reinforcement Learning Based Cross-Domain Interactive Recommendation (SIGIR '22). 290--300."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i5.16579"},{"volume-title":"Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR '20)","author":"Xin Xin","key":"e_1_3_2_1_50_1","unstructured":"Xin Xin, Alexandros Karatzoglou, Ioannis Arapakis, and Joemon M. Jose. 2020. Self-Supervised Reinforcement Learning for Recommender Systems. In Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR '20). 931--940."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531714"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557300"},{"key":"e_1_3_2_1_53_1","volume-title":"ResAct: Reinforcing Long-term Engagement in Sequential Recommendation with Residual Actor (ICLR '23)","author":"Xue Wanqi","year":"2023","unstructured":"Wanqi Xue, Qingpeng Cai, Ruohan Zhan, Dong Zheng, Peng Jiang, and Bo An. 2023. ResAct: Reinforcing Long-term Engagement in Sequential Recommendation with Residual Actor (ICLR '23)."},{"key":"e_1_3_2_1_54_1","first-page":"28954","article-title":"Combo: Conservative offline model-based policy optimization","volume":"34","author":"Yu Tianhe","year":"2021","unstructured":"Tianhe Yu, Aviral Kumar, Rafael Rafailov, Aravind Rajeswaran, Sergey Levine, and Chelsea Finn. 2021. Combo: Conservative offline model-based policy optimization. Advances in Neural Information Processing Systems, Vol. 34 (2021), 28954--28967.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_55_1","volume-title":"MOPO: Model-based Offline Policy Optimization. In Advances in Neural Information Processing Systems (NeurIPS '20","volume":"14142","author":"Yu Tianhe","year":"2020","unstructured":"Tianhe Yu, Garrett Thomas, Lantao Yu, Stefano Ermon, James Y Zou, Sergey Levine, Chelsea Finn, and Tengyu Ma. 2020. MOPO: Model-based Offline Policy Optimization. In Advances in Neural Information Processing Systems (NeurIPS '20, Vol. 33), , H. Larochelle, M. Ranzato, R. Hadsell, M.F. Balcan, and H. Lin (Eds.). 14129--14142."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290975"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539040"},{"key":"e_1_3_2_1_58_1","volume-title":"2023Divide and Conquer: Towards Better Embedding-based Retrieval for Recommender Systems From a Multi-task Perspective. arXiv preprint arXiv:2302.02657","author":"Zhang Yuan","year":"2023","unstructured":"Yuan Zhang, Xue Dong, Weijie Ding, Biao Li, Peng Jiang, and Kun Gai. 2023Divide and Conquer: Towards Better Embedding-based Retrieval for Recommender Systems From a Multi-task Perspective. arXiv preprint arXiv:2302.02657 (2023)."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3450125"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449835"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449788"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330668"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3336191.3371801"}],"event":{"name":"SIGIR '23: The 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Taipei Taiwan","acronym":"SIGIR '23"},"container-title":["Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3539618.3591636","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,19]],"date-time":"2024-07-19T11:25:39Z","timestamp":1721388339000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3539618.3591636"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,7,18]]},"references-count":63,"alternative-id":["10.1145\/3539618.3591636","10.1145\/3539618"],"URL":"https:\/\/doi.org\/10.1145\/3539618.3591636","relation":{},"subject":[],"published":{"date-parts":[[2023,7,18]]},"assertion":[{"value":"2023-07-18","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}