{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T02:01:31Z","timestamp":1740103291235,"version":"3.37.3"},"publisher-location":"New York, NY, USA","reference-count":94,"publisher":"ACM","funder":[{"name":"MOE AcRF TIER 3 Grant","award":["MOE-MOET32022-0001"]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3680705","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:41Z","timestamp":1729925981000},"page":"7667-7676","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["PanoSent: A Panoptic Sextuple Extraction Benchmark for Multimodal Conversational Aspect-based Sentiment Analysis"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2274-5719","authenticated-orcid":false,"given":"Meng","family":"Luo","sequence":"first","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3026-6347","authenticated-orcid":false,"given":"Hao","family":"Fei","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0513-5540","authenticated-orcid":false,"given":"Bobo","family":"Li","sequence":"additional","affiliation":[{"name":"Wuhan University, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6192-1194","authenticated-orcid":false,"given":"Shengqiong","family":"Wu","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3162-935X","authenticated-orcid":false,"given":"Qian","family":"Liu","sequence":"additional","affiliation":[{"name":"The University of Auckland, Auckland, New Zealand"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6924-7931","authenticated-orcid":false,"given":"Soujanya","family":"Poria","sequence":"additional","affiliation":[{"name":"Singapore University of Technology and Design, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3030-1280","authenticated-orcid":false,"given":"Erik","family":"Cambria","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9636-388X","authenticated-orcid":false,"given":"Mong-Li","family":"Lee","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4142-8893","authenticated-orcid":false,"given":"Wynne","family":"Hsu","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al. 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00175"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2021.107134"},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of the ACL. 440--447","author":"Blitzer John","year":"2007","unstructured":"John Blitzer, Mark Dredze, and Fernando Pereira. 2007. Biographies, Bollywood, Boom-boxes and Blenders: Domain Adaptation for Sentiment Classification. In Proceedings of the ACL. 440--447."},{"key":"e_1_3_2_1_5_1","volume-title":"IEMOCAP: Interactive emotional dyadic motion capture database. Language resources and evaluation","author":"Busso Carlos","year":"2008","unstructured":"Carlos Busso, Murtaza Bulut, Chi-Chun Lee, Abe Kazemzadeh, Emily Mower, Samuel Kim, Jeannette N Chang, Sungbok Lee, and Shrikanth S Narayanan. 2008. IEMOCAP: Interactive emotional dyadic motion capture database. Language resources and evaluation, Vol. 42 (2008), 335--359."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.29"},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the HCII.","author":"Cambria Erik","year":"2024","unstructured":"Erik Cambria, Xulang Zhang, Rui Mao, Melvin Chen, and Kenneth Kwok. 2024. SenticNet 8: Fusing emotion AI and commonsense AI for interpretable, trustworthy, and explainable affective computing. In Proceedings of the HCII."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSS.2019.2956957"},{"volume-title":"Proceedings of the ACL. 6515--6524","author":"Chen Shaowei","key":"e_1_3_2_1_9_1","unstructured":"Shaowei Chen, Jie Liu, Yu Wang, Wenzheng Zhang, and Ziming Chi. [n.,d.]. Synchronous Double-channel Recurrent Network for Aspect-Opinion Pair Extraction. In Proceedings of the ACL. 6515--6524."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Zhexue Chen Hong Huang Bang Liu Xuanhua Shi and Hai Jin. 2021. Semantic and Syntactic Enhanced Aspect Sentiment Triplet Extraction. In Findings of the ACL. 1474--1483.","DOI":"10.18653\/v1\/2021.findings-acl.128"},{"key":"e_1_3_2_1_11_1","unstructured":"Hyung Won Chung Le Hou Shayne Longpre Barret Zoph Yi Tay William Fedus Yunxuan Li Xuezhi Wang Mostafa Dehghani Siddhartha Brahma et al. 2022. Scaling instruction-finetuned language models. arXiv preprint arXiv:2210.11416 (2022)."},{"key":"e_1_3_2_1_12_1","volume-title":"A coefficient of agreement for nominal scales. Educational and psychological measurement","author":"Cohen Jacob","year":"1960","unstructured":"Jacob Cohen. 1960. A coefficient of agreement for nominal scales. Educational and psychological measurement, Vol. 20, 1 (1960), 37--46."},{"key":"e_1_3_2_1_13_1","volume-title":"Audio retrieval with wavtext5k and clap training. arXiv preprint arXiv:2209.14275","author":"Deshmukh Soham","year":"2022","unstructured":"Soham Deshmukh, Benjamin Elizalde, and Huaming Wang. 2022. Audio retrieval with wavtext5k and clap training. arXiv preprint arXiv:2209.14275 (2022)."},{"key":"e_1_3_2_1_14_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_15_1","volume-title":"Enhancing chat language models by scaling high-quality instructional conversations. arXiv preprint arXiv:2305.14233","author":"Ding Ning","year":"2023","unstructured":"Ning Ding, Yulin Chen, Bokai Xu, Yujia Qin, Zhi Zheng, Shengding Hu, Zhiyuan Liu, Maosong Sun, and Bowen Zhou. 2023. Enhancing chat language models by scaling high-quality instructional conversations. arXiv preprint arXiv:2305.14233 (2023)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1259"},{"key":"e_1_3_2_1_17_1","volume-title":"Reasoning implicit sentiment with chain-of-thought prompting. arXiv preprint arXiv:2305.11255","author":"Fei Hao","year":"2023","unstructured":"Hao Fei, Bobo Li, Qian Liu, Lidong Bing, Fei Li, and Tat-Seng Chua. 2023. Reasoning implicit sentiment with chain-of-thought prompting. arXiv preprint arXiv:2305.11255 (2023)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Hao Fei Fei Li Chenliang Li Shengqiong Wu Jingye Li and Donghong Ji. 2022. Inheriting the Wisdom of Predecessors: A Multiplex Cascade Framework for Unified Aspect-based Sentiment Analysis.. In IJCAI. 4121--4128.","DOI":"10.24963\/ijcai.2022\/572"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.329"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00730"},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of the International Conference on Machine Learning. 6373--6391","author":"Fei Hao","year":"2022","unstructured":"Hao Fei, Shengqiong Wu, Yafeng Ren, and Meishan Zhang. 2022. Matching structure for dual learning. In Proceedings of the International Conference on Machine Learning. 6373--6391."},{"key":"e_1_3_2_1_22_1","volume-title":"VITRON: A Unified Pixel-level Vision LLM for Understanding, Generating, Segmenting, Editing.","author":"Fei Hao","year":"2024","unstructured":"Hao Fei, Shengqiong Wu, Hanwang Zhang, Tat-Seng Chua, and Shuicheng Yan. 2024. VITRON: A Unified Pixel-level Vision LLM for Understanding, Generating, Segmenting, Editing. (2024)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3393452"},{"key":"e_1_3_2_1_24_1","volume-title":"2024 d. EmpathyEar: An Open-source Avatar Multimodal Empathetic Chatbot. arXiv preprint arXiv:2406.15177","author":"Fei Hao","year":"2024","unstructured":"Hao Fei, Han Zhang, Bin Wang, Lizi Liao, Qian Liu, and Erik Cambria. 2024 d. EmpathyEar: An Open-source Avatar Multimodal Empathetic Chatbot. arXiv preprint arXiv:2406.15177 (2024)."},{"key":"e_1_3_2_1_25_1","volume-title":"Dylan Freedman, Aren Jansen, Wade Lawrence, R Channing Moore, Manoj Plakal, and Marvin Ritter.","author":"Gemmeke Jort F","year":"2017","unstructured":"Jort F Gemmeke, Daniel PW Ellis, Dylan Freedman, Aren Jansen, Wade Lawrence, R Channing Moore, Manoj Plakal, and Marvin Ritter. 2017. Audio set: An ontology and human-labeled dataset for audio events. In IEEE ICASSP. IEEE, 776--780."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01457"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747397"},{"key":"e_1_3_2_1_28_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.534"},{"key":"e_1_3_2_1_30_1","volume-title":"Mmgcn: Multimodal fusion via deep graph convolution network for emotion recognition in conversation. arXiv preprint arXiv:2107.06779","author":"Hu Jingwen","year":"2021","unstructured":"Jingwen Hu, Yuchen Liu, Jinming Zhao, and Qin Jin. 2021. Mmgcn: Multimodal fusion via deep graph convolution network for emotion recognition in conversation. arXiv preprint arXiv:2107.06779 (2021)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"Lei Huang Weijiang Yu Weitao Ma Weihong Zhong Zhangyin Feng Haotian Wang Qianglong Chen Weihua Peng Xiaocheng Feng Bing Qin et al. 2023. A survey on hallucination in large language models: Principles taxonomy challenges and open questions. arXiv preprint arXiv:2311.05232 (2023).","DOI":"10.1145\/3703155"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3183434"},{"key":"e_1_3_2_1_33_1","volume-title":"Proceedings of the ACL. 151--160","author":"Jiang Long","year":"2011","unstructured":"Long Jiang, Mo Yu, Ming Zhou, Xiaohua Liu, and Tiejun Zhao. 2011. Target-dependent Twitter Sentiment Classification. In Proceedings of the ACL. 151--160."},{"key":"e_1_3_2_1_34_1","volume-title":"Juan Diego Rodriguez, and Greg Durrett","author":"Kamoi Ryo","year":"2023","unstructured":"Ryo Kamoi, Tanya Goyal, Juan Diego Rodriguez, and Greg Durrett. 2023. Wice: Real-world entailment for claims in wikipedia. arXiv preprint arXiv:2303.01432 (2023)."},{"key":"e_1_3_2_1_35_1","volume-title":"Shared and private information learning in multimodal sentiment analysis with deep modal alignment and self-supervised multi-task learning. arXiv preprint arXiv:2305.08473","author":"Lai Songning","year":"2023","unstructured":"Songning Lai, Xifeng Hu, Yulong Li, Zhaoxia Ren, Zhi Liu, and Danmin Miao. 2023. Shared and private information learning in multimodal sentiment analysis with deep modal alignment and self-supervised multi-task learning. arXiv preprint arXiv:2305.08473 (2023)."},{"key":"e_1_3_2_1_36_1","volume-title":"Learning to Reduce: Optimal Representations of Structured Data in Prompting Large Language Models. arXiv preprint arXiv:2402.14195","author":"Lee Younghun","year":"2024","unstructured":"Younghun Lee, Sungchul Kim, Tong Yu, Ryan A Rossi, and Xiang Chen. 2024. Learning to Reduce: Optimal Representations of Structured Data in Prompting Large Language Models. arXiv preprint arXiv:2402.14195 (2024)."},{"key":"e_1_3_2_1_37_1","unstructured":"Bobo Li Hao Fei Fei Li Yuhan Wu Jinsong Zhang Shengqiong Wu Jingye Li Yijiang Liu Lizi Liao Tat-Seng Chua and Donghong Ji. 2023. DiaASQ: A Benchmark of Conversational Aspect-based Sentiment Quadruple Analysis. In Findings of the ACL. 13449--13467."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612053"},{"key":"e_1_3_2_1_39_1","volume-title":"Mimic-it: Multi-modal in-context instruction tuning. arXiv preprint arXiv:2306.05425","author":"Li Bo","year":"2023","unstructured":"Bo Li, Yuanhan Zhang, Liangyu Chen, Jinghao Wang, Fanyi Pu, Jingkang Yang, Chunyuan Li, and Ziwei Liu. 2023. Mimic-it: Multi-modal in-context instruction tuning. arXiv preprint arXiv:2306.05425 (2023)."},{"key":"e_1_3_2_1_40_1","volume-title":"Fine-grained semantically aligned vision-language pre-training. Advances in neural information processing systems","author":"Li Juncheng","year":"2022","unstructured":"Juncheng Li, Xin He, Longhui Wei, Long Qian, Linchao Zhu, Lingxi Xie, Yueting Zhuang, Qi Tian, and Siliang Tang. 2022. Fine-grained semantically aligned vision-language pre-training. Advances in neural information processing systems, Vol. 35 (2022), 7290--7303."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"crossref","unstructured":"Ziming Li Yan Zhou Yaxin Liu Fuqing Zhu Chuanpeng Yang and Songlin Hu. 2023 d. QAP: A Quantum-Inspired Adaptive-Priority-Learning Model for Multimodal Emotion Recognition. In Findings of the ACL. 12191--12204.","DOI":"10.18653\/v1\/2023.findings-acl.772"},{"key":"e_1_3_2_1_42_1","volume-title":"Gpt-4v with emotion: A zero-shot benchmark for multimodal emotion understanding. arXiv preprint arXiv:2312.04293","author":"Lian Zheng","year":"2023","unstructured":"Zheng Lian, Licai Sun, Haiyang Sun, Kang Chen, Zhuofan Wen, Hao Gu, Shun Chen, Bin Liu, and Jianhua Tao. 2023. Gpt-4v with emotion: A zero-shot benchmark for multimodal emotion understanding. arXiv preprint arXiv:2312.04293 (2023)."},{"key":"e_1_3_2_1_43_1","volume-title":"Aspect-based sentiment analysis via affective knowledge enhanced graph convolutional networks. Knowledge-Based Systems","author":"Liang Bin","year":"2022","unstructured":"Bin Liang, Hang Su, Lin Gui, Erik Cambria, and Ruifeng Xu. 2022. Aspect-based sentiment analysis via affective knowledge enhanced graph convolutional networks. Knowledge-Based Systems (2022), 107643."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"crossref","unstructured":"Tsung-Yi Lin Michael Maire Serge Belongie James Hays Pietro Perona Deva Ramanan Piotr Doll\u00e1r and C Lawrence Zitnick. 2014. Microsoft coco: Common objects in context. In Procedding of the ECCV. 740--755.","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.152"},{"key":"e_1_3_2_1_46_1","volume-title":"Visual instruction tuning. Advances in neural information processing systems","author":"Liu Haotian","year":"2024","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2024. Visual instruction tuning. Advances in neural information processing systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1168"},{"key":"e_1_3_2_1_48_1","volume-title":"Unified-io 2: Scaling autoregressive multimodal models with vision, language, audio, and action. arXiv preprint arXiv:2312.17172","author":"Lu Jiasen","year":"2023","unstructured":"Jiasen Lu, Christopher Clark, Sangho Lee, Zichen Zhang, Savya Khosla, Ryan Marten, Derek Hoiem, and Aniruddha Kembhavi. 2023. Unified-io 2: Scaling autoregressive multimodal models with vision, language, audio, and action. arXiv preprint arXiv:2312.17172 (2023)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.semeval-1.226"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1066"},{"key":"e_1_3_2_1_51_1","volume-title":"Multimodal sentiment analysis using hierarchical fusion with context modeling. Knowledge-based systems","author":"Majumder Navonil","year":"2018","unstructured":"Navonil Majumder, Devamanyu Hazarika, Alexander Gelbukh, Erik Cambria, and Soujanya Poria. 2018. Multimodal sentiment analysis using hierarchical fusion with context modeling. Knowledge-based systems, Vol. 161 (2018), 124--133."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i15.17597"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.challengehml-1.2"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.asej.2014.04.011"},{"key":"e_1_3_2_1_55_1","first-page":"601","article-title":"A review on sentiment analysis methodologies, practices and applications","volume":"9","author":"Mehta Pooja","year":"2020","unstructured":"Pooja Mehta and Sharnil Pandya. 2020. A review on sentiment analysis methodologies, practices and applications. International Journal of Scientific and Technology Research, Vol. 9, 2 (2020), 601--609.","journal-title":"International Journal of Scientific and Technology Research"},{"key":"e_1_3_2_1_56_1","volume-title":"Orca: Progressive learning from complex explanation traces of gpt-4. arXiv preprint arXiv:2306.02707","author":"Mukherjee Subhabrata","year":"2023","unstructured":"Subhabrata Mukherjee, Arindam Mitra, Ganesh Jawahar, Sahaj Agarwal, Hamid Palangi, and Ahmed Awadallah. 2023. Orca: Progressive learning from complex explanation traces of gpt-4. arXiv preprint arXiv:2306.02707 (2023)."},{"key":"e_1_3_2_1_57_1","volume-title":"A review on sentiment analysis and emotion detection from text. Social network analysis and mining","author":"Nandwani Pansy","year":"2021","unstructured":"Pansy Nandwani and Rupali Verma. 2021. A review on sentiment analysis and emotion detection from text. Social network analysis and mining, Vol. 11, 1 (2021), 81."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/945645.945658"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2020.2970399"},{"key":"e_1_3_2_1_60_1","volume-title":"Instruction tuning with gpt-4. arXiv preprint arXiv:2304.03277","author":"Peng Baolin","year":"2023","unstructured":"Baolin Peng, Chunyuan Li, Pengcheng He, Michel Galley, and Jianfeng Gao. 2023. Instruction tuning with gpt-4. arXiv preprint arXiv:2304.03277 (2023)."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6383"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/S14-2004"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1050"},{"key":"e_1_3_2_1_64_1","volume-title":"How Easy is It to Fool Your Multimodal LLMs? An Empirical Analysis on Deceptive Prompts. arXiv preprint arXiv:2402.13220","author":"Qian Yusu","year":"2024","unstructured":"Yusu Qian, Haotian Zhang, Yinfei Yang, and Zhe Gan. 2024. How Easy is It to Fool Your Multimodal LLMs? An Empirical Analysis on Deceptive Prompts. arXiv preprint arXiv:2402.13220 (2024)."},{"key":"e_1_3_2_1_65_1","volume-title":"Minds versus Machines: Rethinking Entailment Verification with Language Models. arXiv preprint arXiv:2402.03686","author":"Sanyal Soumya","year":"2024","unstructured":"Soumya Sanyal, Tianyi Xiao, Jiacheng Liu, Wenya Wang, and Xiang Ren. 2024. Minds versus Machines: Rethinking Entailment Verification with Language Models. arXiv preprint arXiv:2402.03686 (2024)."},{"key":"e_1_3_2_1_66_1","volume-title":"Survey on aspect-level sentiment analysis","author":"Schouten Kim","year":"2015","unstructured":"Kim Schouten and Flavius Frasincar. 2015. Survey on aspect-level sentiment analysis. IEEE transactions on knowledge and data engineering, Vol. 28, 3 (2015), 813--830."},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.824"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2017.08.003"},{"key":"e_1_3_2_1_69_1","volume-title":"Make a choice! knowledge base question answering with in-context learning. arXiv preprint arXiv:2305.13972","author":"Tan Chuanyuan","year":"2023","unstructured":"Chuanyuan Tan, Yuehe Chen, Wenbiao Shao, and Wenliang Chen. 2023. Make a choice! knowledge base question answering with in-context learning. arXiv preprint arXiv:2305.13972 (2023)."},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1167"},{"key":"e_1_3_2_1_71_1","volume-title":"Thumbs up or thumbs down? Semantic orientation applied to unsupervised classification of reviews. arXiv preprint cs\/0212032","author":"Turney Peter D","year":"2002","unstructured":"Peter D Turney. 2002. Thumbs up or thumbs down? Semantic orientation applied to unsupervised classification of reviews. arXiv preprint cs\/0212032 (2002)."},{"key":"e_1_3_2_1_72_1","volume-title":"Denny Zhou, et al.","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc V Le, Denny Zhou, et al. 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in neural information processing systems, Vol. 35 (2022), 24824--24837."},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.823"},{"key":"e_1_3_2_1_74_1","volume-title":"Towards Semantic Equivalence of Tokenization in Multimodal LLM. arXiv preprint arXiv:2406.05127","author":"Wu Shengqiong","year":"2024","unstructured":"Shengqiong Wu, Hao Fei, Xiangtai Li, Jiayi Ji, Hanwang Zhang, Tat-Seng Chua, and Shuicheng Yan. 2024. Towards Semantic Equivalence of Tokenization in Multimodal LLM. arXiv preprint arXiv:2406.05127 (2024)."},{"key":"e_1_3_2_1_75_1","volume-title":"Proceedings of the International Conference on Machine Learning.","author":"Wu Shengqiong","year":"2024","unstructured":"Shengqiong Wu, Hao Fei, Leigang Qu, Wei Ji, and Tat-Seng Chua. 2024. NExT-GPT: Any-to-Any Multimodal LLM. In Proceedings of the International Conference on Machine Learning."},{"key":"e_1_3_2_1_76_1","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems. 79240--79259","author":"Wu Shengqiong","year":"2023","unstructured":"Shengqiong Wu, Hao Fei, Hanwang Zhang, and Tat-Seng Chua. 2023. Imagine that! abstract-to-intricate text-to-image synthesis with scene graph hallucination diffusion. In Proceedings of the 37th International Conference on Neural Information Processing Systems. 79240--79259."},{"key":"e_1_3_2_1_77_1","unstructured":"Zhen Wu Chengcan Ying Fei Zhao Zhifang Fan Xinyu Dai and Rui Xia. 2020. Grid Tagging Scheme for Aspect-oriented Fine-grained Opinion Extraction. In Findings of the ACL. 2576--2585."},{"key":"e_1_3_2_1_78_1","volume-title":"Wizardlm: Empowering large language models to follow complex instructions. arXiv preprint arXiv:2304.12244","author":"Xu Can","year":"2023","unstructured":"Can Xu, Qingfeng Sun, Kai Zheng, Xiubo Geng, Pu Zhao, Jiazhan Feng, Chongyang Tao, and Daxin Jiang. 2023. Wizardlm: Empowering large language models to follow complex instructions. arXiv preprint arXiv:2304.12244 (2023)."},{"key":"e_1_3_2_1_79_1","volume-title":"mT5: A massively multilingual pre-trained text-to-text transformer. arXiv preprint arXiv:2010.11934","author":"Xue Linting","year":"2020","unstructured":"Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya Siddhant, Aditya Barua, and Colin Raffel. 2020. mT5: A massively multilingual pre-trained text-to-text transformer. arXiv preprint arXiv:2010.11934 (2020)."},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.188"},{"key":"e_1_3_2_1_81_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.421"},{"key":"e_1_3_2_1_82_1","volume-title":"A survey on multimodal large language models. arXiv preprint arXiv:2306.13549","author":"Yin Shukang","year":"2023","unstructured":"Shukang Yin, Chaoyou Fu, Sirui Zhao, Ke Li, Xing Sun, Tong Xu, and Enhong Chen. 2023. A survey on multimodal large language models. arXiv preprint arXiv:2306.13549 (2023)."},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00166"},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"publisher","DOI":"10.3115\/1119355.1119372"},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2957872"},{"key":"e_1_3_2_1_86_1","volume-title":"Soujanya Poria, Erik Cambria, and Louis-Philippe Morency.","author":"Bagher Zadeh AmirAli","year":"2018","unstructured":"AmirAli Bagher Zadeh, Paul Pu Liang, Soujanya Poria, Erik Cambria, and Louis-Philippe Morency. 2018. Multimodal language analysis in the wild: Carnegie Mellon University-MOSEI dataset and interpretable dynamic fusion graph. In Proceedings of the ACL. 2236--2246."},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.408"},{"key":"e_1_3_2_1_88_1","volume-title":"Mm-llms: Recent advances in multimodal large language models. arXiv preprint arXiv:2401.13601","author":"Zhang Duzhen","year":"2024","unstructured":"Duzhen Zhang, Yahan Yu, Chenxing Li, Jiahua Dong, Dan Su, Chenhui Chu, and Dong Yu. 2024. Mm-llms: Recent advances in multimodal large language models. arXiv preprint arXiv:2401.13601 (2024)."},{"key":"e_1_3_2_1_89_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.726"},{"key":"e_1_3_2_1_90_1","volume-title":"A survey on aspect-based sentiment analysis: Tasks, methods, and challenges","author":"Zhang Wenxuan","year":"2022","unstructured":"Wenxuan Zhang, Xin Li, Yang Deng, Lidong Bing, and Wai Lam. 2022. A survey on aspect-based sentiment analysis: Tasks, methods, and challenges. IEEE Transactions on Knowledge and Data Engineering (2022)."},{"key":"e_1_3_2_1_91_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.732"},{"key":"e_1_3_2_1_92_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.391"},{"key":"e_1_3_2_1_93_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612096"},{"key":"e_1_3_2_1_94_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.861"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Melbourne VIC Australia","acronym":"MM '24"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3680705","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T09:03:26Z","timestamp":1732957406000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680705"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":94,"alternative-id":["10.1145\/3664647.3680705","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3680705","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}