{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T05:12:54Z","timestamp":1732684374741,"version":"3.28.2"},"reference-count":138,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2024,8,22]],"date-time":"2024-08-22T00:00:00Z","timestamp":1724284800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,8,22]],"date-time":"2024-08-22T00:00:00Z","timestamp":1724284800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Nat Mach Intell"],"DOI":"10.1038\/s42256-024-00881-z","type":"journal-article","created":{"date-parts":[[2024,8,23]],"date-time":"2024-08-23T13:50:40Z","timestamp":1724421040000},"page":"852-863","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Factuality challenges in the era of large language models and opportunities for fact-checking"],"prefix":"10.1038","volume":"6","author":[{"ORCID":"http:\/\/orcid.org\/0000-0003-1562-7909","authenticated-orcid":false,"given":"Isabelle","family":"Augenstein","sequence":"first","affiliation":[]},{"given":"Timothy","family":"Baldwin","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0003-4085-9648","authenticated-orcid":false,"given":"Meeyoung","family":"Cha","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0002-0210-0369","authenticated-orcid":false,"given":"Tanmoy","family":"Chakraborty","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0001-5354-9257","authenticated-orcid":false,"given":"Giovanni Luca","family":"Ciampaglia","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0002-0651-9028","authenticated-orcid":false,"given":"David","family":"Corney","sequence":"additional","affiliation":[]},{"given":"Renee","family":"DiResta","sequence":"additional","affiliation":[]},{"given":"Emilio","family":"Ferrara","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0002-6894-4951","authenticated-orcid":false,"given":"Scott","family":"Hale","sequence":"additional","affiliation":[]},{"given":"Alon","family":"Halevy","sequence":"additional","affiliation":[]},{"given":"Eduard","family":"Hovy","sequence":"additional","affiliation":[]},{"given":"Heng","family":"Ji","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0003-4384-2876","authenticated-orcid":false,"given":"Filippo","family":"Menczer","sequence":"additional","affiliation":[]},{"given":"Ruben","family":"Miguez","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0002-3600-1510","authenticated-orcid":false,"given":"Preslav","family":"Nakov","sequence":"additional","affiliation":[]},{"given":"Dietram","family":"Scheufele","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0009-0006-0732-5870","authenticated-orcid":false,"given":"Shivam","family":"Sharma","sequence":"additional","affiliation":[]},{"given":"Giovanni","family":"Zagni","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,22]]},"reference":[{"key":"881_CR1","doi-asserted-by":"publisher","first-page":"379","DOI":"10.1002\/j.1538-7305.1948.tb01338.x","volume":"27","author":"CE Shannon","year":"1948","unstructured":"Shannon, C. E. A mathematical theory of communication. Bell Syst. Techn. J. 27, 379\u2013423 (1948).","journal-title":"Bell Syst. Techn. J."},{"key":"881_CR2","doi-asserted-by":"crossref","unstructured":"Wang, Y. et al. M4GT-Bench: evaluation benchmark for black-box machine-generated text detection. In Proc. 62nd Annual Meeting of the Association for Computational Linguistics (Long Papers) (2023).","DOI":"10.18653\/v1\/2024.acl-long.218"},{"key":"881_CR3","doi-asserted-by":"crossref","unstructured":"Huang, J. & Chang, K. C.-C. Towards reasoning in large language models: a survey. In Findings of the Association for Computational Linguistics 1049\u20131065 (ACL, 2023).","DOI":"10.18653\/v1\/2023.findings-acl.67"},{"key":"881_CR4","unstructured":"Radford, A. et al. Improving language understanding by generative pre-training. OpenAI https:\/\/cdn.openai.com\/research-covers\/language-unsupervised\/language_understanding_paper.pdf (2018)."},{"key":"881_CR5","unstructured":"OpenAI. GPT-4 technical report. Preprint at https:\/\/arxiv.org\/abs\/2303.08774 (2023)."},{"key":"881_CR6","doi-asserted-by":"publisher","unstructured":"Llama Team, AI@Meta. The Llama3 Herd of Models. arXiv https:\/\/doi.org\/10.48550\/arXiv.2407.21783 (2024).","DOI":"10.48550\/arXiv.2407.21783"},{"key":"881_CR7","unstructured":"Zhao, W. X. et al. A survey of large language models. Preprint at https:\/\/arxiv.org\/abs\/2303.18223 (2023)."},{"key":"881_CR8","doi-asserted-by":"crossref","unstructured":"Bang, Y. et al. A multitask, multilingual, multimodal evaluation of ChatGPT on reasoning, hallucination, and interactivity. In Proc. 13th International Joint Conference on Natural Language Processing and 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics Vol. 1 (eds Park, J. C. et al.) 675\u2013718 (ACL, 2023).","DOI":"10.18653\/v1\/2023.ijcnlp-main.45"},{"key":"881_CR9","unstructured":"Bergstrom, C. T. & Ogbunu, C. B. ChatGPT isn\u2019t \u2018hallucinating.\u2019 It\u2019s bullshitting. Undark https:\/\/undark.org\/2023\/04\/06\/chatgpt-isnt-hallucinating-its-bullshitting (2023)."},{"key":"881_CR10","doi-asserted-by":"crossref","unstructured":"Filippova, K. Controlled hallucinations: learning to generate faithfully from noisy data. In Findings of the Association for Computational Linguistics: EMNLP 2020 864\u2013870 (ACL, 2020).","DOI":"10.18653\/v1\/2020.findings-emnlp.76"},{"key":"881_CR11","doi-asserted-by":"publisher","unstructured":"Sison, A. J. G., Daza, M. T., Gozalo-Brizuela, R. & Garrido-Merch\u00e1n, E. C. ChatGPT: more than a \u2018weapon of mass deception\u2019 \u2013 ethical challenges and responses from the Human-Centered Artificial Intelligence (HCAI) perspective. Int. J. Hum.\u2013Comput. Interact. https:\/\/doi.org\/10.1080\/10447318.2023.2225931 (2023).","DOI":"10.1080\/10447318.2023.2225931"},{"key":"881_CR12","first-page":"45","volume":"12","author":"L Iftikhar","year":"2023","unstructured":"Iftikhar, L. et al. DocGPT: impact of ChatGPT-3 on health services as a virtual doctor. EC Paediatri. 12, 45\u201355 (2023).","journal-title":"EC Paediatri."},{"key":"881_CR13","doi-asserted-by":"publisher","first-page":"e40922","DOI":"10.2196\/40922","volume":"25","author":"H Chin","year":"2023","unstructured":"Chin, H. et al. User-chatbot conversations during the COVID-19 pandemic: study based on topic modeling and sentiment analysis. J. Med. Internet Res. 25, e40922 (2023).","journal-title":"J. Med. Internet Res."},{"key":"881_CR14","doi-asserted-by":"crossref","unstructured":"Peskoff, D. & Stewart, B. Credible without credit: domain experts assess generative language models. In Proc. 61st Annual Meeting of the Association for Computational Linguistics Vol. 2, 427\u2013438 (ACL, 2023).","DOI":"10.18653\/v1\/2023.acl-short.37"},{"key":"881_CR15","doi-asserted-by":"publisher","first-page":"100308","DOI":"10.1016\/j.patter.2021.100308","volume":"2","author":"B Srivastava","year":"2021","unstructured":"Srivastava, B. Did chatbots miss their \u2018Apollo moment\u2019? Potential, gaps, and lessons from using collaboration assistants during COVID-19. Patterns 2, 100308 (2021).","journal-title":"Patterns"},{"key":"881_CR16","unstructured":"Verma, P. & Oremus, W. ChatGPT invented a sexual harassment scandal and named a real law prof as the accused. Washington Post (5 April 2023); https:\/\/www.washingtonpost.com\/technology\/2023\/04\/05\/chatgpt-lies\/"},{"key":"881_CR17","unstructured":"DeVerna, M. R., Yan, H. Y., Yang, K.-C. & Menczer, F. Fact-checking information generated by a large language model can decrease news discernment. Preprint at https:\/\/arxiv.org\/abs\/2308.10800 (2023)."},{"key":"881_CR18","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1145\/3299768","volume":"62","author":"E Ferrara","year":"2019","unstructured":"Ferrara, E. The history of digital spam. Commun. ACM 62, 82\u201391 (2019).","journal-title":"Commun. ACM"},{"key":"881_CR19","unstructured":"Metz, C. Five technologies that will rock your world. New York Times (13 November 2017); https:\/\/www.nytimes.com\/2017\/11\/13\/business\/dealbook\/five-technologies-that-will-rock-your-world.html"},{"key":"881_CR20","unstructured":"Vincent, J. Google\u2019s AI chatbot Bard makes factual error in first demo. The Verge (8 February 2023); https:\/\/www.theverge.com\/2023\/2\/8\/23590864\/google-ai-chatbot-bard-mistake-error-exoplanet-demo"},{"key":"881_CR21","unstructured":"Anand, N. Google\u2019s Gemini AI accused of acting too \u2018woke\u2019, company admits mistake. Business Standard (22 Feburary 2024); https:\/\/www.business-standard.com\/companies\/news\/google-s-gemini-ai-accused-of-acting-too-woke-company-admits-mistake-124022200663_1.html"},{"key":"881_CR22","unstructured":"Marcus, G. Deep learning is hitting a wall. Nautilus (10 March, 2022); https:\/\/nautil.us\/deep-learning-is-hitting-a-wall-238440\/"},{"key":"881_CR23","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1145\/3616863","volume":"66","author":"S Dutta","year":"2023","unstructured":"Dutta, S. & Chakraborty, T. Thus spake ChatGPT. Commun. ACM 66, 16\u201319 (2023).","journal-title":"Commun. ACM"},{"key":"881_CR24","doi-asserted-by":"publisher","first-page":"678","DOI":"10.1038\/s42256-023-00690-w","volume":"5","author":"F Menczer","year":"2023","unstructured":"Menczer, F., Crandall, D., Ahn, Y.-Y. & Kapadia, A. Addressing the harms of AI-generated inauthentic content. Nat. Mach. Intell. 5, 678\u2013680 (2023).","journal-title":"Nat. Mach. Intell."},{"key":"881_CR25","unstructured":"Patel, A. & Sattler, J. Creatively Malicious Prompt Engineering (WithSecure Labs, 2023)."},{"key":"881_CR26","unstructured":"Vykopal, I. et al. Disinformation capabilities of large language models. Preprint at https:\/\/arxiv.org\/abs\/2311.08838 (2024)."},{"key":"881_CR27","unstructured":"Zhang, H. et al. R-Tuning: teaching large language models to refuse unknown questions. In Proc. 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies Vol. 1 (eds Duh, K. et al.) 7113\u20137139 (ACL, 2024)."},{"key":"881_CR28","unstructured":"Brewster, J., Wang, M. & Palmer, C. Plagiarism-bot? How low-quality websites are using AI to deceptively rewrite content from mainstream news outlets. NewsGuard (24 August 2023); https:\/\/www.newsguardtech.com\/misinformation-monitor\/august-2023\/"},{"key":"881_CR29","doi-asserted-by":"publisher","unstructured":"Yang, K.-C. & Menczer, F. Anatomy of an AI-powered malicious social botnet. J. Quant. Descr. Digit. Media https:\/\/doi.org\/10.51685\/jqd.2024.icwsm.7 (2024).","DOI":"10.51685\/jqd.2024.icwsm.7"},{"key":"881_CR30","unstructured":"Wang, C. et al. Survey on factuality in large language models: knowledge, retrieval and domain-specificity. Preprint at https:\/\/arxiv.org\/abs\/2310.07521 (2023)."},{"key":"881_CR31","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3571730","volume":"55","author":"Z Ji","year":"2023","unstructured":"Ji, Z. et al. Survey of hallucination in natural language generation. ACM Comput. Surv. 55, 1\u201338 (2023).","journal-title":"ACM Comput. Surv."},{"key":"881_CR32","unstructured":"Rawte, V., Sheth, A. & Das, A. A survey of hallucination in large foundation models. Preprint at https:\/\/arxiv.org\/abs\/2309.05922 (2023)."},{"key":"881_CR33","unstructured":"Zhang, Y. et al. Siren\u2019s song in the ai ocean: a survey on hallucination in large language models. Preprint at https:\/\/arxiv.org\/abs\/2309.01219 (2023)."},{"key":"881_CR34","doi-asserted-by":"publisher","unstructured":"Ferrara, E. Should ChatGPT be biased? Challenges and risks of bias in large language models. First Monday https:\/\/doi.org\/10.5210\/fm.v28i11.13346 (2023).","DOI":"10.5210\/fm.v28i11.13346"},{"key":"881_CR35","doi-asserted-by":"publisher","first-page":"36","DOI":"10.1145\/365153.365168","volume":"9","author":"J Weizenbaum","year":"1966","unstructured":"Weizenbaum, J. ELIZA\u2014a computer program for the study of natural language communication between man and machine. Commun. ACM 9, 36\u201345 (1966).","journal-title":"Commun. ACM"},{"key":"881_CR36","doi-asserted-by":"crossref","unstructured":"Pan Y et al. On the risk of misinformation pollution with large language models. In Findings of the Association for Computational Linguistics: EMNLP 2023 1389\u20131403 (ACL, 2013).","DOI":"10.18653\/v1\/2023.findings-emnlp.97"},{"key":"881_CR37","doi-asserted-by":"crossref","unstructured":"Mirsky, Y. & Lee, W. The creation and detection of deepfakes: A survey. ACM Comput. Surv. 54, 7 (2021).","DOI":"10.1145\/3425780"},{"key":"881_CR38","doi-asserted-by":"crossref","unstructured":"Yang, K.-C., Singh, D. & Menczer, F. Characteristics and prevalence of fake social media profiles with AI-generated faces. Preprint at https:\/\/arxiv.org\/abs\/2401.02627 (2024).","DOI":"10.54501\/jots.v2i4.197"},{"key":"881_CR39","doi-asserted-by":"crossref","unstructured":"Liu, N. F., Zhang, T. & Liang, P. Evaluating verifiability in generative search engines. In Findings of the Association for Computational Linguistics: EMNLP 2023 7001\u20137025 (ACL, 2023).","DOI":"10.18653\/v1\/2023.findings-emnlp.467"},{"key":"881_CR40","doi-asserted-by":"publisher","unstructured":"Galitsky, B. A. Truth-o-meter: collaborating with llm in fighting its hallucinations. Preprints https:\/\/doi.org\/10.20944\/preprints202307.1723.v1 (2023).","DOI":"10.20944\/preprints202307.1723.v1"},{"key":"881_CR41","doi-asserted-by":"publisher","unstructured":"Ji, Z. et al. Survey of hallucination in natural language generation. ACM Comput. Surv. https:\/\/doi.org\/10.1145\/3571730 (2023).","DOI":"10.1145\/3571730"},{"key":"881_CR42","unstructured":"Vincent, J. AI-generated answers temporarily banned on coding Q&A site Stack Overflow. The Verge (5 December 2022); https:\/\/www.theverge.com\/2022\/12\/5\/23493932\/chatgpt-ai-generated-answers-temporarily-banned-stack-overflow-llms-dangers"},{"key":"881_CR43","doi-asserted-by":"publisher","first-page":"100324","DOI":"10.1016\/j.xops.2023.100324","volume":"3","author":"F Antaki","year":"2023","unstructured":"Antaki, F., Touma, S., Milad, D., El-Khoury, J. & Duval, R. Evaluating the performance of ChatGPT in ophthalmology: an analysis of its successes and shortcomings. Ophthalmol. Sci. 3, 100324 (2023).","journal-title":"Ophthalmol. Sci."},{"key":"881_CR44","unstructured":"Abels, G. Can ChatGPT fact-check? We tested. Poynter (31 May 2023); https:\/\/www.poynter.org\/fact-checking\/2023\/chatgpt-ai-replace-fact-checking\/"},{"key":"881_CR45","doi-asserted-by":"crossref","unstructured":"Fadeeva, E. et al. Fact-checking the output of large language models via token-level uncertainty quantification. In Proc. 62nd Annual Meeting of the Association for Computational Linguistics (2024).","DOI":"10.18653\/v1\/2024.findings-acl.558"},{"key":"881_CR46","doi-asserted-by":"crossref","unstructured":"Geng J. et al. A survey of confidence estimation and calibration in large language models. In Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies Vol. 1, 6577\u20136595 (ACL, 2024).","DOI":"10.18653\/v1\/2024.naacl-long.366"},{"key":"881_CR47","unstructured":"Wang, Y., Li, H., Han, X., Nakov, P. & Baldwin, T. Do-not-answer: evaluating safeguards in LLMs. In Findings of the Association for Computational Linguistics 2024 896\u2013911 (ACL, 2024)."},{"key":"881_CR48","doi-asserted-by":"crossref","unstructured":"Xie, Y., Fang, M., Pi, R. & Gong, N. GradSafe: detecting unsafe prompts for LLMs via safety-critical gradient analysis. In Proc. 62nd Annual Meeting of the Association for Computational Linguistics (Long Papers) (2024).","DOI":"10.18653\/v1\/2024.acl-long.30"},{"key":"881_CR49","doi-asserted-by":"publisher","unstructured":"Bai, H., Voelkel, J. G., Eichstaedt, j. C. & Willer, R. Artificial intelligence can persuade humans on political issues. Preprint at https:\/\/doi.org\/10.31219\/osf.io\/stakv (2023).","DOI":"10.31219\/osf.io\/stakv"},{"key":"881_CR50","doi-asserted-by":"publisher","first-page":"499","DOI":"10.1146\/annurev-psych-010419-050807","volume":"71","author":"NM Brashier","year":"2020","unstructured":"Brashier, N. M. & Marsh, E. J. Judging truth. Annu. Rev. Psychol. 71, 499\u2013515 (2020).","journal-title":"Annu. Rev. Psychol."},{"key":"881_CR51","unstructured":"Whatsapp. IFCN fact-checking organizations on WhatsApp. https:\/\/faq.whatsapp.com\/5059120540855664 (2023)."},{"key":"881_CR52","doi-asserted-by":"publisher","first-page":"250","DOI":"10.1037\/0022-3514.35.4.250","volume":"35","author":"RE Nisbett","year":"1977","unstructured":"Nisbett, R. E. & Wilson, T. D. The halo effect: evidence for unconscious alteration of judgments. J. Pers. Soc. Psychol. 35, 250\u2013256 (1977).","journal-title":"J. Pers. Soc. Psychol."},{"key":"881_CR53","doi-asserted-by":"crossref","unstructured":"Guillory, J. E. & Hancock, J. T. in The Psychology of Social Networking Vol. 1, 66\u201377 (De Gruyter Open Poland, 2015).","DOI":"10.1515\/9783110473780-008"},{"key":"881_CR54","unstructured":"Qin, J. et al. Why does new knowledge create messy ripple effects in llms? Preprint at https:\/\/arxiv.org\/abs\/2407.12828 (2024)."},{"key":"881_CR55","unstructured":"Zhang, Y. et al. Knowledge overshadowing causes amalgamated hallucination in large language models: analysis and solution. Preprint at https:\/\/arxiv.org\/abs\/2407.08039v1 (2024)."},{"key":"881_CR56","unstructured":"Liu, J. et al. EVEDIT: Event-based knowledge editing with deductive editing boundaries. Preprint at https:\/\/arxiv.org\/abs\/2402.11324 (2024)."},{"key":"881_CR57","doi-asserted-by":"publisher","first-page":"558","DOI":"10.1038\/s42256-023-00664-y","volume":"5","author":"T Chakraborty","year":"2023","unstructured":"Chakraborty, T. & Masud, S. Judging the creative prowess of AI. Nat. Mach. Intell. 5, 558 (2023).","journal-title":"Nat. Mach. Intell."},{"key":"881_CR58","unstructured":"Srivastava, A. et al. Beyond the imitation game: quantifying and extrapolating the capabilities of language models. In Transactions on Machine Learning Research (2023)."},{"key":"881_CR59","doi-asserted-by":"crossref","unstructured":"Wang, A. et al. GLUE: a multi-task benchmark and analysis platform for natural language understanding. In Proc. 2018 EMNLP Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP 353\u2013355 (ACL, 2018).","DOI":"10.18653\/v1\/W18-5446"},{"key":"881_CR60","unstructured":"Wang, A. et al. SuperGLUE: a stickier benchmark for general-purpose language understanding systems. In Proc. 33rd International Conference on Neural Information Processing Systems 3266\u20133280 (Curran Associates Inc., 2019)."},{"key":"881_CR61","doi-asserted-by":"crossref","unstructured":"Lin, S., Hilton, J. & Evans, O. TruthfulQA: measuring how models mimic human falsehoods. In Proc. 60th Annual Meeting of the Association for Computational Linguistics Vol. 1 (eds Muresan, S. et al.) 3214\u20133252 (ACL, 2022).","DOI":"10.18653\/v1\/2022.acl-long.229"},{"key":"881_CR62","unstructured":"Golchin, S. & Surdeanu, M. Time travel in LLMs: tracing data contamination in large language models. In Proc. 12th International Conference on Learning Representations (2024)."},{"key":"881_CR63","doi-asserted-by":"crossref","unstructured":"Fu, J., Ng, S.-K., Jiang, Z. & Liu, P. GPTScore: evaluate as you desire. In Proc. 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies Vol. 1 (eds Duh, K. et al.) 6556\u20136576 (ACL, 2024).","DOI":"10.18653\/v1\/2024.naacl-long.365"},{"key":"881_CR64","doi-asserted-by":"crossref","unstructured":"Liu, Y. et al. G-Eval: NLG evaluation using GPT-4 with better human alignment. In Proc. 2023 Conference on Empirical Methods in Natural Language Processing (eds Bouamor, H. et al.) 2511\u20132522 (ACL, 2023).","DOI":"10.18653\/v1\/2023.emnlp-main.153"},{"key":"881_CR65","doi-asserted-by":"crossref","unstructured":"Manakul, P., Liusie, A. & Gales, M. SelfCheckGPT: zero-resource black-box hallucination detection for generative large language models. In Proc. 2023 Conference on Empirical Methods in Natural Language Processing (eds Bouamor, H. et al.) 9004\u20139017 (ACL, 2023).","DOI":"10.18653\/v1\/2023.emnlp-main.557"},{"key":"881_CR66","unstructured":"Wang, P. et al. Large language models are not fair evaluators. Preprint at https:\/\/arxiv.org\/abs\/2305.17926 (2023)."},{"key":"881_CR67","unstructured":"Coles, C. 11% of data employees paste into ChatGPT is confidential. Cyberhaven https:\/\/www.cyberhaven.com\/blog\/4-2-of-workers-have-pasted-company-data-into-chatgpt (2023)."},{"key":"881_CR68","unstructured":"Meta. Meta\u2019s Third-Party Fact-Checking Program. https:\/\/www.facebook.com\/formedia\/mjp\/programs\/third-party-fact-checking (2016)."},{"key":"881_CR69","doi-asserted-by":"crossref","unstructured":"Truong, B. T., Lou, X., Flammini, A. & Menczer, F. Vulnerabilities of the online public square to manipulation. PNAS Nexus 3, pgae258 (2024).","DOI":"10.1093\/pnasnexus\/pgae258"},{"key":"881_CR70","doi-asserted-by":"publisher","first-page":"102197","DOI":"10.1016\/j.jretconser.2020.102197","volume":"57","author":"S Talwar","year":"2020","unstructured":"Talwar, S., Dhir, A., Singh, D., Virk, G. S. & Salo, J. Sharing of fake news on social media: application of the honeycomb framework and the third-person effect hypothesis. J. Retail. Consum. Serv. 57, 102197 (2020).","journal-title":"J. Retail. Consum. Serv."},{"key":"881_CR71","doi-asserted-by":"publisher","unstructured":"Avram, M., Micallef, N., Patil, S. & Menczer, F. Exposure to social engagement metrics increases vulnerability to misinformation. HKS Misinform. Rev. https:\/\/doi.org\/10.37016\/mr-2020-033 (2020).","DOI":"10.37016\/mr-2020-033"},{"key":"881_CR72","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-022-10070-w","volume":"12","author":"F Pierri","year":"2022","unstructured":"Pierri, F. et al. Online misinformation is linked to early COVID-19 vaccination hesitancy and refusal. Sci. Rep. 12, 5966 (2022).","journal-title":"Sci. Rep."},{"key":"881_CR73","unstructured":"Christiano, P. et al. Deep reinforcement learning from human preferences. In Proc. 31st International Conference on Neural Information Processing Systems 4302\u20134310 (Curran Associates Inc., 2017)."},{"key":"881_CR74","unstructured":"Sengupta, N. et al. Jais and Jais-chat: Arabic-centric foundation and instruction-tuned open generative large language models. Preprint at https:\/\/arxiv.org\/abs\/2308.16149 (2023)."},{"key":"881_CR75","unstructured":"Lin, S.-C. et al. FLAME: factuality-aware alignment for large language models. Preprint at https:\/\/arxiv.org\/abs\/2405.01525 (2024)."},{"key":"881_CR76","unstructured":"Lee, N. et al. Factuality enhanced language models for open-ended text generation. In Proc. 36th International Conference on Neural Information Processing Systems 34586\u201334599 (Curran Associates Inc., 2024)."},{"key":"881_CR77","unstructured":"Ians. Hackers exploiting ChatGPT to write malicious codes to steal your data. Business Standard (8 January 2023); https:\/\/www.business-standard.com\/article\/technology\/hackers-exploiting-chatgpt-to-write-malicious-codes-to-steal-your-data-123010800216_1.html"},{"key":"881_CR78","unstructured":"Sunilkumar, S. R. Cybercriminals using ChatGPT AI bot to develop malicious tools? Hindustan Times (16 January 2023); https:\/\/www.hindustantimes.com\/technology\/cybercriminals-using-chatgpt-ai-bot-to-develop-malicious-tools-101673876956902.html"},{"key":"881_CR79","unstructured":"Guu, K., Lee, K., Tung, Z., Pasupat, P. & Chang, M.-W. REALM: retrieval-augmented language model pre-training. In Proc. 37th International Conference on Machine Learning 3929\u20133938 (JMLR, 2020)."},{"key":"881_CR80","unstructured":"Reddy, R. G. et al. SmartBook: AI-assisted situation report generation. Preprint at https:\/\/arxiv.org\/abs\/2303.14337 (2023)."},{"key":"881_CR81","unstructured":"Martineau, K. What is retrieval-augmented generation? IBM Blog https:\/\/research.ibm.com\/blog\/retrieval-augmented-generation-RAG (2023)."},{"key":"881_CR82","unstructured":"Gou, Z. et al. CRITIC: large language models can self-correct with tool-interactive critiquing. In Proc. 12th International Conference on Learning Representations (2024)."},{"key":"881_CR83","doi-asserted-by":"crossref","unstructured":"Cohen, R., Hamri, M., Geva, M. & Globerson, A. LM vs LM: detecting factual errors via cross examination. In Proc. 2023 Conference on Empirical Methods in Natural Language Processing 12621\u201312640 (ACL, 2023).","DOI":"10.18653\/v1\/2023.emnlp-main.778"},{"key":"881_CR84","doi-asserted-by":"crossref","unstructured":"Dziri, N., Madotto, A., Za\u00efane, O. & Bose, A. J. Neural path hunter: reducing hallucination in dialogue systems via path grounding. In Proc. 2021 Conference on Empirical Methods in Natural Language Processing (eds Moens, M.-F. et al.) 2197\u20132214 (ACL, 2021).","DOI":"10.18653\/v1\/2021.emnlp-main.168"},{"key":"881_CR85","doi-asserted-by":"crossref","unstructured":"De Cao, N., Aziz, W. & Titov, I. Editing factual knowledge in language models. In Proc. 2021 Conference on Empirical Methods in Natural Language Processing (eds Moens, M.-F. et al.) 6491\u20136506 (ACL, 2021).","DOI":"10.18653\/v1\/2021.emnlp-main.522"},{"key":"881_CR86","unstructured":"Yu, P. & Ji, H. Self information update for large language models through mitigating exposure bias. Preprint at https:\/\/arxiv.org\/abs\/2305.18582 (2023)."},{"key":"881_CR87","unstructured":"Zhang, T., Kishore, V., Wu, F., Weinberger, K. Q. & Artzi, Y. BERTScore: evaluating text generation with BERT. In Proc. 8th International Conference on Learning Representations (2020)."},{"key":"881_CR88","doi-asserted-by":"crossref","unstructured":"Li, J., Cheng, X., Zhao, X., Nie, J. Y. & Wen, J. R. HaluEval: A large-scale hallucination evaluation benchmark for large language models. In Proc. Conference on Empirical Methods in Natural Language Processing 6449\u20136464 (ACL, 2023).","DOI":"10.18653\/v1\/2023.emnlp-main.397"},{"key":"881_CR89","doi-asserted-by":"crossref","unstructured":"Min, S. et al. FActScore: fine-grained atomic evaluation of factual precision in long form text generation. In Proc. 2023 Conference on Empirical Methods in Natural Language Processing 12076\u201312100 (ACL, 2023).","DOI":"10.18653\/v1\/2023.emnlp-main.741"},{"key":"881_CR90","unstructured":"Cheng, Q. et al. Evaluating hallucinations in Chinese large language models. Preprint at https:\/\/arxiv.org\/abs\/2310.03368 (2023)."},{"key":"881_CR91","unstructured":"Wang, Y. et al. M4: multi-generator, multi-domain, and multi-lingual black-box machine-generated text detection. In Proc. Conference of the European Chapter of the Association for Computational Linguistics 1369\u20131407 (ACL, 2024)."},{"key":"881_CR92","doi-asserted-by":"crossref","unstructured":"Huang, K.-H., McKeown, K., Nakov, P., Choi, Y. & Ji, H. Faking fake news for real fake news detection: propaganda-loaded training data generation. In Proc. 61st Annual Meeting of the Association for Computational Linguistics Vol. 1, 14571\u201314589 (ACL, 2023).","DOI":"10.18653\/v1\/2023.acl-long.815"},{"key":"881_CR93","unstructured":"Su, J., Zhuo, T. Y., Mansurov, J., Wang, D. & Nakov, P. Fake news detectors are biased against texts generated by large language models. Preprint at https:\/\/arxiv.org\/abs\/2309.08674 (2023)."},{"key":"881_CR94","doi-asserted-by":"crossref","unstructured":"Su, J., Cardie, C. & Nakov, P. Adapting fake news detection to the era of large language models. In Findings of the Association for Computational Linguistics: NAACL 2024 1473\u20131490 (ACL, 2024).","DOI":"10.18653\/v1\/2024.findings-naacl.95"},{"key":"881_CR95","unstructured":"Kirchenbauer, J. et al. On the reliability of watermarks for large language models. In Proc. 12th International Conference on Learning Representations (2024)."},{"key":"881_CR96","unstructured":"Groh, M. et al. Human detection of political speech deepfakes across transcripts, audio, and video. Preprint at https:\/\/arxiv.org\/abs\/2202.12883 (2023)."},{"key":"881_CR97","unstructured":"Sadasivan, V. S., Kumar, A., Balasubramanian, S., Wang, W. & Feizi, S. Can AI-generated text be reliably detected? Preprint at https:\/\/arxiv.org\/abs\/2303.11156 (2023)."},{"key":"881_CR98","doi-asserted-by":"crossref","unstructured":"Hussain, S., Neekhara, P., Jere, M., Koushanfar, F. & McAuley, J. Adversarial deepfakes: evaluating vulnerability of deepfake detectors to adversarial examples. In Proc. IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV) 3348\u20133357 (IEEE, 2021).","DOI":"10.1109\/WACV48630.2021.00339"},{"key":"881_CR99","doi-asserted-by":"publisher","unstructured":"Quelle, D. & Bovet, A. The perils and promises of fact-checking with large language models. Front. Artif. Intell. https:\/\/doi.org\/10.3389\/frai.2024.1341697 (2024).","DOI":"10.3389\/frai.2024.1341697"},{"key":"881_CR100","doi-asserted-by":"crossref","unstructured":"Sundriyal, M., Singh, P., Akhtar, M. S., Sengupta, S. & Chakraborty, T. DESYR: definition and syntactic representation based claim detection on the web. In Proc. 30th ACM International Conference on Information & Knowledge Management 1764\u20131773 (ACM, 2021).","DOI":"10.1145\/3459637.3482423"},{"key":"881_CR101","doi-asserted-by":"crossref","unstructured":"Sundriyal, M., Chakraborty, T. & Nakov, P. From chaos to clarity: claim normalization to empower fact-checking. In Findings of the Association for Computational Linguistics: EMNLP 2023 6594\u20136609 (ACL, 2023).","DOI":"10.18653\/v1\/2023.findings-emnlp.439"},{"key":"881_CR102","doi-asserted-by":"crossref","unstructured":"Huang, K.-H., Chan, H. P. & Ji, H. Zero-shot faithful factual error correction. In Proc. 61st Annual Meeting of the Association for Computational Linguistics Vol. 1, 5660\u20135676 (ACL, 2023).","DOI":"10.18653\/v1\/2023.acl-long.311"},{"key":"881_CR103","doi-asserted-by":"crossref","unstructured":"Shaar, S., Babulkov, N., Da San Martino, G. & Nakov, P. That is a known lie: detecting previously fact-checked claims. In Proc. 58th Annual Meeting of the Association for Computational Linguistics 3607\u20133618 (ACL, 2020).","DOI":"10.18653\/v1\/2020.acl-main.332"},{"key":"881_CR104","unstructured":"Zhang, B., Ding, D. & Jing, L. How would stance detection techniques evolve after the launch of ChatGPT? Preprint at https:\/\/arxiv.org\/abs\/2212.14548 (2022)."},{"key":"881_CR105","unstructured":"Wang, Y., Wang, M. & Nakov, P. Rethinking STS and NLI in large language models. In Findings of the Association for Computational Linguistics: EACL 2024 965\u2013982 (ACL, 2024)."},{"key":"881_CR106","doi-asserted-by":"publisher","first-page":"101861","DOI":"10.1016\/j.inffus.2023.101861","volume":"99","author":"J Koco\u0144","year":"2023","unstructured":"Koco\u0144, J. et al. ChatGPT: jack of all trades, master of none. Inform. Fusion 99, 101861 (2023).","journal-title":"Inform. Fusion"},{"key":"881_CR107","unstructured":"Shankar, A. Remembering conversations: building chatbots with short and long-term memory on AWS. ITNEXT https:\/\/itnext.io\/remembering-conversations-building-chatbots-with-short-and-long-term-memory-on-aws-c1361c130046 (2023)."},{"key":"881_CR108","doi-asserted-by":"crossref","unstructured":"Baly, R., Karadzhov, G., Alexandrov, D., Glass, J. & Nakov, P. Predicting factuality of reporting and bias of news media sources. In Proc. 2018 Conference on Empirical Methods in Natural Language Processing 3528\u20133539 (ACL, 2018).","DOI":"10.18653\/v1\/D18-1389"},{"key":"881_CR109","unstructured":"Yang, K.-C. & Menczer, F. Large language models can rate news outlet credibility. Preprint at https:\/\/arxiv.org\/abs\/2304.00228 (2023)."},{"key":"881_CR110","doi-asserted-by":"crossref","unstructured":"Panayotov, P., Shukla, U., Sencar, H. T., Nabeel, M. & Nakov, P. GREENER: graph neural networks for news media profiling. In Proc. 2022 Conference on Empirical Methods in Natural Language Processing 7470\u20137480 (ACL, 2022).","DOI":"10.18653\/v1\/2022.emnlp-main.506"},{"key":"881_CR111","unstructured":"Nakov, P. et al. A survey on predicting the factuality and the bias of news media. Preprint at https:\/\/arxiv.org\/abs\/2103.12506 (2021)."},{"key":"881_CR112","unstructured":"Dickson, B. Fact-checking and truth in the age of ChatGPT and LLMs. TechTalks https:\/\/bdtechtalks.com\/2023\/10\/30\/llm-fact-checking-hallucinations\/ (2023)."},{"key":"881_CR113","unstructured":"Chern, I. et al. FacTool: factuality detection in generative AI\u2014a tool augmented framework for multi-task and multi-domain scenarios. Preprint at https:\/\/arxiv.org\/abs\/2307.13528 (2023)."},{"key":"881_CR114","unstructured":"Sun, L. et al. TrustLLM: trustworthiness in large language models. In Proc. 41st International Conference on Machine Learning (2024)."},{"key":"881_CR115","unstructured":"Chen, S. et al. FELM: benchmarking factuality evaluation of large language models. In Proc. 37th Conference on Neural Information Processing Systems Datasets and Benchmarks Track 44502\u201344523 (Curran Associates Inc., 2023)."},{"key":"881_CR116","doi-asserted-by":"crossref","unstructured":"Li, S. et al. Open-domain hierarchical event schema induction by incremental prompting and verification. In Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics Vol. 1 (eds Rogers, A. et al.) 5677\u20135697 (ACL, 2023).","DOI":"10.18653\/v1\/2023.acl-long.312"},{"key":"881_CR117","unstructured":"Wang, Y. et al. Factcheck-Bench: fine-grained evaluation benchmark for automatic fact-checkers. Preprint at https:\/\/arxiv.org\/abs\/2311.09000 (2024)."},{"key":"881_CR118","unstructured":"Feng, S. et al. Knowledge card: filling LLMs\u2019 knowledge gaps with plug-in specialized language models. In Proc. 12th International Conference on Learning Representations (2024)."},{"key":"881_CR119","doi-asserted-by":"crossref","unstructured":"Choi, E. C. & Ferrara, E. FACT-GPT: fact-checking augmentation via claim matching with LLMs. In Companion Proceedings of the ACM on Web Conference 883\u2013886 (ACM, 2024).","DOI":"10.1145\/3589335.3651504"},{"key":"881_CR120","doi-asserted-by":"crossref","unstructured":"Bender, E. M., Gebru, T., McMillan-Major, A. & Shmitchell, S. On the dangers of stochastic parrots: can language models be too big? In Proc. 2021 ACM Conference on Fairness, Accountability, and Transparency 610\u2013623 (ACM, 2021).","DOI":"10.1145\/3442188.3445922"},{"key":"881_CR121","unstructured":"Generative Artificial Intelligence in Education Departmental Statement (Department for Education, 2023); https:\/\/www.gov.uk\/government\/publications\/generative-artificial-intelligence-in-education\/generative-artificial-intelligence-ai-in-education"},{"key":"881_CR122","unstructured":"Peng, B. et al. Check your facts and try again: improving large language models with external knowledge and automated feedback. Preprint at https:\/\/arxiv.org\/abs\/2302.12813 (2023)."},{"key":"881_CR123","unstructured":"Shi, C. et al. A thorough examination of decoding methods in the era of LLMs. Preprint at https:\/\/arxiv.org\/abs\/2402.06925 (2024)."},{"key":"881_CR124","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Fang, M., Chen, L., Namazi-Rad, M.-R. & Wang, J. How do large language models capture the ever-changing world knowledge? A review of recent advances. In Proc. 2023 Conference on Empirical Methods in Natural Language Processing (eds Bouamor, H. et al.) 8289\u20138311 (ACL, 2023).","DOI":"10.18653\/v1\/2023.emnlp-main.516"},{"key":"881_CR125","unstructured":"Patterson, D. et al. Carbon emissions and large neural network training. Preprint at https:\/\/arxiv.org\/abs\/2104.10350 (2021)."},{"key":"881_CR126","unstructured":"Bereczki, T. & Liber, \u00c1. AI\u2019s emergent abilities a \u2018double-edged sword\u2019. IAPP https:\/\/iapp.org\/news\/a\/ais-emergent-abilities-a-double-edged-sword (2023)."},{"key":"881_CR127","unstructured":"Lu, S., Bigoulaeva, I., Sachdeva, R., Madabushi, H. T. & Gurevych, I. Are emergent abilities in large language models just in-context learning? Preprint at https:\/\/arxiv.org\/abs\/2309.01809 (2023)."},{"key":"881_CR128","unstructured":"Gupta, G., Rastegarpanah, B., Iyer, A., Rubin, J. & Kenthapadi, K. Measuring distributional shifts in text: the advantage of language model-based embeddings. Preprint at https:\/\/arxiv.org\/abs\/2312.02337 (2023)."},{"key":"881_CR129","unstructured":"Brown, T. et al. Language models are few-shot learners. Adv. Neur. Inf. Process. Syst. 33, 1877\u20131901 (2020)."},{"key":"881_CR130","unstructured":"Radford, A. et al. Language models are unsupervised multitask learners. OpenAI https:\/\/cdn.openai.com\/better-language-models\/language_models_are_unsupervised_multitask_learners.pdf (2019)."},{"key":"881_CR131","unstructured":"Touvron, H. et al. LLaMA: open and efficient foundation language models. Preprint at https:\/\/arxiv.org\/abs\/2302.13971 (2023)."},{"key":"881_CR132","unstructured":"Human Genome Editing: Science, Ethics, and Governance (National Academies, 2017)."},{"key":"881_CR133","unstructured":"ChatGPT: OpenAI Reopens the Platform in Italy Guaranteeing More Transparency and More Rights to European Users and Non-users (GPDP, 2023); https:\/\/www.garanteprivacy.it\/home\/docweb\/-\/docweb-display\/docweb\/9881490"},{"key":"881_CR134","unstructured":"Chatbots, deepfakes, and voice clones: AI deception for sale. FTC Business Blog https:\/\/www.ftc.gov\/business-guidance\/blog\/2023\/03\/chatbots-deepfakes-voice-clones-ai-deception-sale (2023)."},{"key":"881_CR135","unstructured":"Cohen, J. Right on track: NVIDIA open-source software helps developers add guardrails to AI chatbots. NVIDIA Blogs https:\/\/blogs.nvidia.com\/blog\/2023\/04\/25\/ai-chatbot-guardrails-nemo (2023)."},{"key":"881_CR136","doi-asserted-by":"publisher","first-page":"e2327647","DOI":"10.1001\/jamanetworkopen.2023.27647","volume":"6","author":"A Chen","year":"2023","unstructured":"Chen, A. & Chen, D. O. Accuracy of chatbots in citing journal articles. JAMA Netw. Open 6, e2327647 (2023).","journal-title":"JAMA Netw. Open"},{"key":"881_CR137","unstructured":"Spataro, J. Introducing Microsoft 365 Copilot \u2013 your copilot for work. Official Microsoft Blog https:\/\/blogs.microsoft.com\/blog\/2023\/03\/16\/introducing-microsoft-365-copilot-your-copilot-for-work (2023)."},{"key":"881_CR138","doi-asserted-by":"crossref","unstructured":"Pacheco, D. et al. Uncovering coordinated networks on social media: methods and case studies. In Proc. International AAAI Conference on Web and Social Media 455\u2013466 (AAAI, 2021).","DOI":"10.1609\/icwsm.v15i1.18075"}],"container-title":["Nature Machine Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s42256-024-00881-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-024-00881-z","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-024-00881-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T04:20:31Z","timestamp":1732681231000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s42256-024-00881-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,22]]},"references-count":138,"journal-issue":{"issue":"8","published-online":{"date-parts":[[2024,8]]}},"alternative-id":["881"],"URL":"https:\/\/doi.org\/10.1038\/s42256-024-00881-z","relation":{},"ISSN":["2522-5839"],"issn-type":[{"type":"electronic","value":"2522-5839"}],"subject":[],"published":{"date-parts":[[2024,8,22]]},"assertion":[{"value":"25 October 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 July 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 August 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}