{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T09:59:58Z","timestamp":1725789598088},"publisher-location":"New York, NY, USA","reference-count":20,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,12,15]]},"DOI":"10.1145\/3639233.3639332","type":"proceedings-article","created":{"date-parts":[[2024,3,5]],"date-time":"2024-03-05T16:02:10Z","timestamp":1709654530000},"page":"77-84","update-policy":"http:\/\/dx.doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Cantonese to Written Chinese Translation via HuggingFace Translation Pipeline"],"prefix":"10.1145","author":[{"ORCID":"http:\/\/orcid.org\/0000-0001-5683-968X","authenticated-orcid":false,"given":"Raptor Yick-Kan","family":"Kwok","sequence":"first","affiliation":[{"name":"School of Science and Technology, Hong Kong Metropolitan University, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0001-6523-9547","authenticated-orcid":false,"given":"Siu-Kei","family":"Au Yeung","sequence":"additional","affiliation":[{"name":"School of Science and Technology, Hong Kong Metropolitan University, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-1708-7099","authenticated-orcid":false,"given":"Zongxi","family":"Li","sequence":"additional","affiliation":[{"name":"School of Science and Technology, Hong Kong Metropolitan University, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-5421-7622","authenticated-orcid":false,"given":"Kevin","family":"Hung","sequence":"additional","affiliation":[{"name":"School of Science and Technology, Hong Kong Metropolitan University, China"}]}],"member":"320","published-online":{"date-parts":[[2024,3,5]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Yifan Wang, and Shibingfeng Zhang.","author":"Dare Megan","year":"2023","unstructured":"Megan Dare, Valentina\u00a0Fajardo Diaz, Averie Ho\u00a0Zoen So, Yifan Wang, and Shibingfeng Zhang. 2023. Unsupervised Mandarin-Cantonese Machine Translation. arxiv:2301.03971\u00a0[cs.CL]"},{"key":"e_1_3_2_1_2_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arxiv:1810.04805\u00a0[cs.CL]","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arxiv:1810.04805\u00a0[cs.CL]"},{"key":"e_1_3_2_1_3_1","unstructured":"John Gibbons. 1987. Code-mixing and code choice : a Hong Kong case study. Multilingual Matters Clevedon Avon England."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_2_1_5_1","volume-title":"Cambridge England: University Press.","author":"Yue\u00a0Hashimoto Oi","year":"1972","unstructured":"Oi kan Yue\u00a0Hashimoto. 1972. In Phonology of Cantonese. Cambridge England: University Press."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1910.13461"},{"key":"e_1_3_2_1_7_1","unstructured":"H. Liang. 2021. Cantonese Mandarin Translations Dataset. https:\/\/huggingface.co\/datasets\/botisan-ai\/cantonese-mandarin-translations"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W15-3049"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-4770"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-6319"},{"key":"e_1_3_2_1_12_1","unstructured":"Associated Press. 2022. As Cantonese language wanes efforts to preserve it grow. https:\/\/www.nbcnews.com\/news\/asian-america\/cantonese-language-wanes-efforts-grow-preserve-rcna49870. [Accessed 23-May-2023]."},{"key":"e_1_3_2_1_13_1","volume-title":"Language models are unsupervised multitask learners. OpenAI blog 1, 8","author":"Radford Alec","year":"2019","unstructured":"Alec Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei, Ilya Sutskever, 2019. Language models are unsupervised multitask learners. OpenAI blog 1, 8 (2019), 9."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.365"},{"key":"e_1_3_2_1_15_1","volume-title":"CPT: A Pre-Trained Unbalanced Transformer for Both Chinese Language Understanding and Generation. arXiv preprint arXiv:2109.05729","author":"Shao Yunfan","year":"2021","unstructured":"Yunfan Shao, Zhichao Geng, Yitao Liu, Junqi Dai, Fei Yang, Li Zhe, Hujun Bao, and Xipeng Qiu. 2021. CPT: A Pre-Trained Unbalanced Transformer for Both Chinese Language Understanding and Generation. arXiv preprint arXiv:2109.05729 (2021)."},{"volume-title":"Cantonese as Written Language: The Growth of a Written Chinese Vernacular","author":"Snow D.","key":"e_1_3_2_1_16_1","unstructured":"D. Snow. 2004. Cantonese as Written Language: The Growth of a Written Chinese Vernacular. Hong Kong University Press. https:\/\/books.google.com.hk\/books?id=pFnP_FXf-lAC"},{"key":"e_1_3_2_1_17_1","unstructured":"Ilya Sutskever Oriol Vinyals and Quoc\u00a0V. Le. 2014. Sequence to Sequence Learning with Neural Networks. arxiv:1409.3215\u00a0[cs.CL]"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Thomas Wolf Lysandre Debut Victor Sanh Julien Chaumond Clement Delangue Anthony Moi Pierric Cistac Tim Rault R\u00e9mi Louf Morgan Funtowicz Joe Davison Sam Shleifer Patrick von Platen Clara Ma Yacine Jernite Julien Plu Canwen Xu Teven\u00a0Le Scao Sylvain Gugger Mariama Drame Quentin Lhoest and Alexander\u00a0M Rush. 2020. HuggingFace\u2019s Transformers: State-of-the-art Natural Language Processing. arxiv:1910.03771\u00a0[cs.CL]","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"e_1_3_2_1_19_1","volume-title":"XLNet: Generalized Autoregressive Pretraining for Language Understanding. CoRR abs\/1906.08237","author":"Yang Zhilin","year":"2019","unstructured":"Zhilin Yang, Zihang Dai, Yiming Yang, Jaime\u00a0G. Carbonell, Ruslan Salakhutdinov, and Quoc\u00a0V. Le. 2019. XLNet: Generalized Autoregressive Pretraining for Language Understanding. CoRR abs\/1906.08237 (2019). arXiv:1906.08237http:\/\/arxiv.org\/abs\/1906.08237"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3508230.3508242"}],"event":{"name":"NLPIR 2023: 2023 7th International Conference on Natural Language Processing and Information Retrieval","acronym":"NLPIR 2023","location":"Seoul Republic of Korea"},"container-title":["Proceedings of the 2023 7th International Conference on Natural Language Processing and Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3639233.3639332","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,11]],"date-time":"2024-07-11T23:58:00Z","timestamp":1720742280000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3639233.3639332"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,15]]},"references-count":20,"alternative-id":["10.1145\/3639233.3639332","10.1145\/3639233"],"URL":"https:\/\/doi.org\/10.1145\/3639233.3639332","relation":{},"subject":[],"published":{"date-parts":[[2023,12,15]]},"assertion":[{"value":"2024-03-05","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}