{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,4]],"date-time":"2025-05-04T04:03:56Z","timestamp":1746331436325,"version":"3.40.4"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","funder":[{"name":"Beijing Natural Science Foundation","award":["4222027"]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62222215"],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,10]]},"DOI":"10.1145\/3626772.3657805","type":"proceedings-article","created":{"date-parts":[[2024,7,11]],"date-time":"2024-07-11T16:40:05Z","timestamp":1720716005000},"page":"1619-1628","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["EulerFormer: Sequential User Behavior Modeling with Complex Vector Attention"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5569-2591","authenticated-orcid":false,"given":"Zhen","family":"Tian","sequence":"first","affiliation":[{"name":"GSAI, Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8333-6196","authenticated-orcid":false,"given":"Wayne Xin","family":"Zhao","sequence":"additional","affiliation":[{"name":"GSAI, Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-4193-7833","authenticated-orcid":false,"given":"Changwang","family":"Zhang","sequence":"additional","affiliation":[{"name":"Poisson Lab, Huawei, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6524-1756","authenticated-orcid":false,"given":"Xin","family":"Zhao","sequence":"additional","affiliation":[{"name":"Poisson Lab, Huawei, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5907-1607","authenticated-orcid":false,"given":"Zhongrui","family":"Ma","sequence":"additional","affiliation":[{"name":"Poisson Lab, Huawei, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9777-9676","authenticated-orcid":false,"given":"Ji-Rong","family":"Wen","sequence":"additional","affiliation":[{"name":"GSAI, Renmin University of China, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2024,7,11]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Yin-Wen Chang, and Chun-Sung Ferng.","author":"Chen Pu-Chin","year":"2021","unstructured":"Pu-Chin Chen, Henry Tsai, Srinadh Bhojanapalli, Hyung Won Chung, Yin-Wen Chang, and Chun-Sung Ferng. 2021. A simple and effective positional encoding for transformers. arXiv preprint arXiv:2104.08698 (2021)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3326937.3341261"},{"key":"e_1_3_2_1_3_1","first-page":"1","article-title":"Palm: Scaling language modeling with pathways","volume":"24","author":"Chowdhery Aakanksha","year":"2023","unstructured":"Aakanksha Chowdhery, Sharan Narang, Jacob Devlin, Maarten Bosma, Gaurav Mishra, Adam Roberts, Paul Barham, Hyung Won Chung, Charles Sutton, Sebastian Gehrmann, et al. 2023. Palm: Scaling language modeling with pathways. Journal of Machine Learning Research, Vol. 24, 240 (2023), 1--113.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_4_1","volume-title":"Transformer-xl: Attentive language models beyond a fixed-length context. arXiv preprint arXiv:1901.02860","author":"Dai Zihang","year":"2019","unstructured":"Zihang Dai, Zhilin Yang, Yiming Yang, Jaime Carbonell, Quoc V Le, and Ruslan Salakhutdinov. 2019. Transformer-xl: Attentive language models beyond a fixed-length context. arXiv preprint arXiv:1901.02860 (2019)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531931"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462978"},{"key":"e_1_3_2_1_7_1","volume-title":"Deberta: Decoding-enhanced bert with disentangled attention. arXiv preprint arXiv:2006.03654","author":"He Pengcheng","year":"2020","unstructured":"Pengcheng He, Xiaodong Liu, Jianfeng Gao, and Weizhu Chen. 2020. Deberta: Decoding-enhanced bert with disentangled attention. arXiv preprint arXiv:2006.03654 (2020)."},{"key":"e_1_3_2_1_8_1","volume-title":"Translation-based Recommendation: A Scalable Method for Modeling Sequential Behavior.. In IJCAI. 5264--5268.","author":"He Ruining","year":"2018","unstructured":"Ruining He, Wang-Cheng Kang, Julian J McAuley, et al. 2018. Translation-based Recommendation: A Scalable Method for Modeling Sequential Behavior.. In IJCAI. 5264--5268."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583434"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531955"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539381"},{"key":"e_1_3_2_1_12_1","volume-title":"Improve transformer models with better relative position embeddings. arXiv preprint arXiv:2009.13658","author":"Huang Zhiheng","year":"2020","unstructured":"Zhiheng Huang, Davis Liang, Peng Xu, and Bing Xiang. 2020. Improve transformer models with better relative position embeddings. arXiv preprint arXiv:2009.13658 (2020)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2018.00035"},{"key":"e_1_3_2_1_14_1","volume-title":"Rethinking positional encoding in language pre-training. arXiv preprint arXiv:2006.15595","author":"Ke Guolin","year":"2020","unstructured":"Guolin Ke, Di He, and Tie-Yan Liu. 2020. Rethinking positional encoding in language pre-training. arXiv preprint arXiv:2006.15595 (2020)."},{"key":"e_1_3_2_1_15_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_16_1","volume-title":"On the sentence embeddings from pre-trained language models. arXiv preprint arXiv:2011.05864","author":"Li Bohan","year":"2020","unstructured":"Bohan Li, Hao Zhou, Junxian He, Mingxuan Wang, Yiming Yang, and Lei Li. 2020. On the sentence embeddings from pre-trained language models. arXiv preprint arXiv:2011.05864 (2020)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512104"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591717"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583467"},{"key":"e_1_3_2_1_20_1","volume-title":"Yarn: Efficient context window extension of large language models. arXiv preprint arXiv:2309.00071","author":"Peng Bowen","year":"2023","unstructured":"Bowen Peng, Jeffrey Quesnelle, Honglu Fan, and Enrico Shippole. 2023. Yarn: Efficient context window extension of large language models. arXiv preprint arXiv:2309.00071 (2023)."},{"key":"e_1_3_2_1_21_1","volume-title":"Train short, test long: Attention with linear biases enables input length extrapolation. arXiv preprint arXiv:2108.12409","author":"Press Ofir","year":"2021","unstructured":"Ofir Press, Noah A Smith, and Mike Lewis. 2021. Train short, test long: Attention with linear biases enables input length extrapolation. arXiv preprint arXiv:2108.12409 (2021)."},{"key":"e_1_3_2_1_22_1","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans Ilya Sutskever et al. 2018. Improving language understanding by generative pre-training. (2018)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.5555\/3455716.3455856"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.127063"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3357895"},{"key":"e_1_3_2_1_26_1","volume-title":"A length-extrapolatable transformer. arXiv preprint arXiv:2212.10554","author":"Sun Yutao","year":"2022","unstructured":"Yutao Sun, Li Dong, Barun Patra, Shuming Ma, Shaohan Huang, Alon Benhaim, Vishrav Chaudhary, Xia Song, and Furu Wei. 2022. A length-extrapolatable transformer. arXiv preprint arXiv:2212.10554 (2022)."},{"volume-title":"The World Wide Web Conference. 1782--1793","author":"Tang Jiaxi","key":"e_1_3_2_1_27_1","unstructured":"Jiaxi Tang, Francois Belletti, Sagar Jain, Minmin Chen, Alex Beutel, Can Xu, and Ed H. Chi. 2019. Towards neural mixture recommender for long range dependent user sequences. In The World Wide Web Conference. 1782--1793."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591681"},{"key":"e_1_3_2_1_29_1","volume-title":"Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, et al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"e_1_3_2_1_30_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/2766462.2767694"},{"key":"e_1_3_2_1_32_1","volume-title":"Sequential recommender systems: challenges, progress and prospects. arXiv preprint arXiv:2001.04830","author":"Wang Shoujin","year":"2019","unstructured":"Shoujin Wang, Liang Hu, Yan Wang, Longbing Cao, Quan Z Sheng, and Mehmet Orgun. 2019. Sequential recommender systems: challenges, progress and prospects. arXiv preprint arXiv:2001.04830 (2019)."},{"volume-title":"ITM web of conferences","author":"Wkeglarczyk Stanis\u0142aw","key":"e_1_3_2_1_33_1","unstructured":"Stanis\u0142aw Wkeglarczyk. 2018. Kernel density estimation and its application. In ITM web of conferences, Vol. 23. EDP Sciences, 00037."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383313.3412258"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE53745.2022.00099"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591889"},{"key":"e_1_3_2_1_37_1","volume-title":"Xlnet: Generalized autoregressive pretraining for language understanding. Advances in neural information processing systems","author":"Yang Zhilin","year":"2019","unstructured":"Zhilin Yang, Zihang Dai, Yiming Yang, Jaime Carbonell, Russ R Salakhutdinov, and Quoc V Le. 2019. Xlnet: Generalized autoregressive pretraining for language understanding. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_38_1","volume-title":"Graph contrastive learning with augmentations. Advances in neural information processing systems","author":"You Yuning","year":"2020","unstructured":"Yuning You, Tianlong Chen, Yongduo Sui, Ting Chen, Zhangyang Wang, and Yang Shen. 2020. Graph contrastive learning with augmentations. Advances in neural information processing systems, Vol. 33 (2020), 5812--5823."},{"key":"e_1_3_2_1_39_1","volume-title":"Next item recommendation with self-attention. arXiv preprint arXiv:1808.06414","author":"Zhang Shuai","year":"2018","unstructured":"Shuai Zhang, Yi Tay, Lina Yao, and Aixin Sun. 2018. Next item recommendation with self-attention. arXiv preprint arXiv:1808.06414 (2018)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462908"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"crossref","unstructured":"Tingting Zhang Pengpeng Zhao Yanchi Liu Victor S Sheng Jiajie Xu Deqing Wang Guanfeng Liu Xiaofang Zhou et al. 2019. Feature-level Deeper Self-Attention Network for Sequential Recommendation.. In IJCAI. 4320--4326.","DOI":"10.24963\/ijcai.2019\/600"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557680"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482016"},{"key":"e_1_3_2_1_44_1","unstructured":"Wayne Xin Zhao Kun Zhou Junyi Li Tianyi Tang Xiaolei Wang Yupeng Hou Yingqian Min Beichen Zhang Junjie Zhang Zican Dong et al. 2023. A survey of large language models. arXiv preprint arXiv:2303.18223 (2023)."},{"key":"e_1_3_2_1_45_1","volume-title":"Rethinking positional encoding. arXiv preprint arXiv:2107.02561","author":"Zheng Jianqiao","year":"2021","unstructured":"Jianqiao Zheng, Sameera Ramasinghe, and Simon Lucey. 2021. Rethinking positional encoding. arXiv preprint arXiv:2107.02561 (2021)."}],"event":{"name":"SIGIR 2024: The 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Washington DC USA","acronym":"SIGIR 2024"},"container-title":["Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3626772.3657805","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,3]],"date-time":"2025-05-03T11:07:48Z","timestamp":1746270468000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626772.3657805"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,10]]},"references-count":45,"alternative-id":["10.1145\/3626772.3657805","10.1145\/3626772"],"URL":"https:\/\/doi.org\/10.1145\/3626772.3657805","relation":{},"subject":[],"published":{"date-parts":[[2024,7,10]]},"assertion":[{"value":"2024-07-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}