{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,25]],"date-time":"2024-09-25T04:35:35Z","timestamp":1727238935226},"reference-count":47,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2024,6,6]],"date-time":"2024-06-06T00:00:00Z","timestamp":1717632000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Natural Language Processing Journal"],"published-print":{"date-parts":[[2024,9]]},"DOI":"10.1016\/j.nlp.2024.100083","type":"journal-article","created":{"date-parts":[[2024,6,9]],"date-time":"2024-06-09T00:13:41Z","timestamp":1717892021000},"page":"100083","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Evaluating LLMs on document-based QA: Exact answer selection and numerical extraction using CogTale dataset"],"prefix":"10.1016","volume":"8","author":[{"ORCID":"http:\/\/orcid.org\/0000-0002-3603-3125","authenticated-orcid":false,"given":"Zafaryab","family":"Rasool","sequence":"first","affiliation":[]},{"given":"Stefanus","family":"Kurniawan","sequence":"additional","affiliation":[]},{"given":"Sherwin","family":"Balugo","sequence":"additional","affiliation":[]},{"given":"Scott","family":"Barnett","sequence":"additional","affiliation":[]},{"given":"Rajesh","family":"Vasa","sequence":"additional","affiliation":[]},{"given":"Courtney","family":"Chesser","sequence":"additional","affiliation":[]},{"given":"Benjamin M.","family":"Hampstead","sequence":"additional","affiliation":[]},{"given":"Sylvie","family":"Belleville","sequence":"additional","affiliation":[]},{"given":"Kon","family":"Mouzakis","sequence":"additional","affiliation":[]},{"given":"Alex","family":"Bahar-Fuchs","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"doi-asserted-by":"crossref","unstructured":"Acharya,\u00a0A., Singh,\u00a0B., Onoe,\u00a0N., 2023. LLM Based Generation of Item-Description for Recommendation System. In: Proceedings of the 17th ACM Conference on Recommender Systems. pp. 1204\u20131207.","key":"10.1016\/j.nlp.2024.100083_b1","DOI":"10.1145\/3604915.3610647"},{"key":"10.1016\/j.nlp.2024.100083_b2","series-title":"International Conference on Machine Learning","first-page":"337","article-title":"Using large language models to simulate multiple humans and replicate human subject studies","author":"Aher","year":"2023"},{"year":"2023","author":"Bai","series-title":"Benchmarking foundation models with language-model-as-an-examiner","key":"10.1016\/j.nlp.2024.100083_b3"},{"issue":"3","key":"10.1016\/j.nlp.2024.100083_b4","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1016\/0167-4943(93)90052-J","article-title":"Effects of reality orientation therapy on elderly patients in the community","volume":"17","author":"Baldelli","year":"1993","journal-title":"Arch. Gerontol. Geriatrics"},{"year":"2023","author":"Bang","series-title":"A multitask, multilingual, multimodal evaluation of ChatGPT on reasoning, hallucination, and interactivity","key":"10.1016\/j.nlp.2024.100083_b5"},{"year":"2024","author":"Barnett","series-title":"Seven failure points when engineering a retrieval augmented generation system","key":"10.1016\/j.nlp.2024.100083_b6"},{"year":"2023","author":"Bian","series-title":"ChatGPT is a knowledgeable but inexperienced solver: An investigation of commonsense problem in large language models","key":"10.1016\/j.nlp.2024.100083_b7"},{"issue":"8","key":"10.1016\/j.nlp.2024.100083_b8","doi-asserted-by":"crossref","first-page":"861","DOI":"10.1191\/0269215505cr911oa","article-title":"Cognitive rehabilitation combined with drug treatment in Alzheimer\u2019s disease patients: A pilot study","volume":"19","author":"Bottino","year":"2005","journal-title":"Clinical Rehabil."},{"key":"10.1016\/j.nlp.2024.100083_b9","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.nlp.2024.100083_b10","doi-asserted-by":"crossref","first-page":"124","DOI":"10.1590\/S1980-57642009DN30200010","article-title":"Cognitive training in older adults with mild cognitive impairment: Impact on cognitive and functional performance","volume":"3","author":"Brum","year":"2009","journal-title":"Dementia Neuropsychol."},{"issue":"4","key":"10.1016\/j.nlp.2024.100083_b11","doi-asserted-by":"crossref","first-page":"617","DOI":"10.1017\/S1041610212002177","article-title":"Benefits of training working memory in amnestic mild cognitive impairment: Specific and transfer effects","volume":"25","author":"Carretti","year":"2013","journal-title":"Int. Psychogeriatr."},{"issue":"3","key":"10.1016\/j.nlp.2024.100083_b12","doi-asserted-by":"crossref","first-page":"e192","DOI":"10.1016\/j.archger.2010.11.004","article-title":"Impact of metacognition and motivation on the efficacy of strategic memory training in older adults: Analysis of specific, transfer and maintenance effects","volume":"52","author":"Carretti","year":"2011","journal-title":"Arch. Gerontol. Geriatrics"},{"issue":"8","key":"10.1016\/j.nlp.2024.100083_b13","first-page":"868","article-title":"Computerized structured cognitive training in patients affected by early-stage Alzheimer\u2019s disease is feasible and effective: A randomized controlled study","volume":"31","author":"Cavallo","year":"2016","journal-title":"Arch. Clin. Neuropsychol."},{"year":"2023","author":"Chang","series-title":"A survey on evaluation of large language models","key":"10.1016\/j.nlp.2024.100083_b14"},{"year":"2021","author":"Dasigi","series-title":"A dataset of information-seeking questions and answers anchored in research papers","key":"10.1016\/j.nlp.2024.100083_b15"},{"key":"10.1016\/j.nlp.2024.100083_b16","article-title":"GPT-3.5, GPT-4, or BARD? Evaluating LLMs reasoning ability in zero-shot setting and performance boosting through prompts","volume":"5","author":"Espejel","year":"2023","journal-title":"Nat. Lang. Process. J."},{"year":"2020","author":"Ferguson","series-title":"IIRC: A dataset of incomplete information reading comprehension questions","key":"10.1016\/j.nlp.2024.100083_b17"},{"issue":"2","key":"10.1016\/j.nlp.2024.100083_b18","doi-asserted-by":"crossref","first-page":"244","DOI":"10.1080\/13825585.2014.915918","article-title":"Repetition-lag training to improve recollection memory in older people with amnestic mild cognitive impairment. A randomized controlled trial","volume":"22","author":"Finn","year":"2015","journal-title":"Aging, Neuropsychol. Cognit."},{"key":"10.1016\/j.nlp.2024.100083_b19","doi-asserted-by":"crossref","first-page":"346","DOI":"10.1162\/tacl_a_00370","article-title":"Did aristotle use a laptop? A question answering benchmark with implicit reasoning strategies","volume":"9","author":"Geva","year":"2021","journal-title":"Trans. Assoc. Comput. Linguist."},{"issue":"3","key":"10.1016\/j.nlp.2024.100083_b20","doi-asserted-by":"crossref","first-page":"933","DOI":"10.1002\/alz.12718","article-title":"Toward rational use of cognitive training in those with mild cognitive impairment","volume":"19","author":"Hampstead","year":"2023","journal-title":"Alzheimer\u2019s Dementia"},{"issue":"1","key":"10.1016\/j.nlp.2024.100083_b21","first-page":"1","article-title":"Efficacy of the ubiquitous spaced retrieval-based memory advancement and rehabilitation training (USMART) program among patients with mild cognitive impairment: A randomized controlled crossover trial","volume":"9","author":"Han","year":"2017","journal-title":"Alzheimer\u2019s Res. Therapy"},{"key":"10.1016\/j.nlp.2024.100083_b22","series-title":"Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing","first-page":"2567","article-title":"PubMedQA: A dataset for biomedical research question answering","author":"Jin","year":"2019"},{"issue":"3","key":"10.1016\/j.nlp.2024.100083_b23","doi-asserted-by":"crossref","first-page":"535","DOI":"10.1109\/TBDATA.2019.2921572","article-title":"Billion-scale similarity search with GPUs","volume":"7","author":"Johnson","year":"2019","journal-title":"IEEE Trans. Big Data"},{"key":"10.1016\/j.nlp.2024.100083_b24","article-title":"A survey of GPT-3 family large language models including ChatGPT and GPT-4","author":"Kalyan","year":"2023","journal-title":"Nat. Lang. Process. J."},{"year":"2023","author":"Kamalloo","series-title":"Evaluating open-domain question answering in the era of large language models","key":"10.1016\/j.nlp.2024.100083_b25"},{"key":"10.1016\/j.nlp.2024.100083_b26","first-page":"22199","article-title":"Large language models are zero-shot reasoners","volume":"35","author":"Kojima","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"1","key":"10.1016\/j.nlp.2024.100083_b27","doi-asserted-by":"crossref","first-page":"170","DOI":"10.1038\/s41597-023-02068-4","article-title":"BioASQ-QA: A manually curated corpus for biomedical question answering","volume":"10","author":"Krithara","year":"2023","journal-title":"Sci. Data"},{"issue":"2","key":"10.1016\/j.nlp.2024.100083_b28","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1002\/gps.2086","article-title":"Cognitive rehabilitation in patients with mild cognitive impairment","volume":"24","author":"Kurz","year":"2009","journal-title":"Int. J. Geriatric Psychiatry: J. Psychiatry Late Life Allied Sci."},{"issue":"7","key":"10.1016\/j.nlp.2024.100083_b29","doi-asserted-by":"crossref","first-page":"1576","DOI":"10.1080\/09602011.2021.1919529","article-title":"Effectiveness of a visual imagery training program to improve prospective memory in older adults with and without mild cognitive impairment: A randomized controlled study","volume":"32","author":"Lajeunesse","year":"2022","journal-title":"Neuropsychol. Rehabil."},{"key":"10.1016\/j.nlp.2024.100083_b30","article-title":"Applying large language models and chain-of-thought for automatic scoring","author":"Lee","year":"2024","journal-title":"Comput. Educ.: Artif. Intell."},{"key":"10.1016\/j.nlp.2024.100083_b31","series-title":"ICML 2022 Workshop on Knowledge Retrieval and Language Models","article-title":"Huge frozen language models as readers for open-domain question answering","author":"Levine","year":"2022"},{"key":"10.1016\/j.nlp.2024.100083_b32","first-page":"9459","article-title":"Retrieval-augmented generation for knowledge-intensive nlp tasks","volume":"33","author":"Lewis","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"doi-asserted-by":"crossref","unstructured":"Liu,\u00a0C., Li,\u00a0X., Shang,\u00a0L., Jiang,\u00a0X., Liu,\u00a0Q., Lam,\u00a0E., Wong,\u00a0N., 2023. Gradually Excavating External Knowledge for Implicit Complex Question Answering. In: Findings of the Association for Computational Linguistics: EMNLP 2023. pp. 14405\u201314417.","key":"10.1016\/j.nlp.2024.100083_b33","DOI":"10.18653\/v1\/2023.findings-emnlp.961"},{"year":"2023","author":"OpenAI","series-title":"GPT-4 technical report","key":"10.1016\/j.nlp.2024.100083_b34"},{"year":"2014","author":"Paliouras","series-title":"A challenge on large-scale biomedical semantic indexing and question answering","key":"10.1016\/j.nlp.2024.100083_b35"},{"key":"10.1016\/j.nlp.2024.100083_b36","series-title":"European Conference on Information Retrieval","first-page":"534","article-title":"Visconde: Multi-document QA with GPT-3 and neural reranking","author":"Pereira","year":"2023"},{"year":"2023","author":"Qin","series-title":"Is ChatGPT a general-purpose natural language processing task solver?","key":"10.1016\/j.nlp.2024.100083_b37"},{"year":"2023","author":"Ram","series-title":"In-context retrieval-augmented language models","key":"10.1016\/j.nlp.2024.100083_b38"},{"year":"2024","author":"Rasool","series-title":"LLMs for test input generation for semantic caches","key":"10.1016\/j.nlp.2024.100083_b39"},{"issue":"5","key":"10.1016\/j.nlp.2024.100083_b40","doi-asserted-by":"crossref","first-page":"825","DOI":"10.1017\/S1041610213000045","article-title":"Efficacy of a cognitive intervention program in patients with mild cognitive impairment","volume":"25","author":"Rojas","year":"2013","journal-title":"Int. Psychogeriatr."},{"issue":"1","key":"10.1016\/j.nlp.2024.100083_b41","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s13643-021-01787-2","article-title":"CogTale: An online platform for the evaluation, synthesis, and dissemination of evidence from cognitive interventions studies","volume":"10","author":"Sabates","year":"2021","journal-title":"Syst. Rev."},{"year":"2023","author":"Shi","series-title":"REPLUG: Retrieval-augmented black-box language models","key":"10.1016\/j.nlp.2024.100083_b42"},{"year":"2023","author":"Singhal","series-title":"Towards expert-level medical question answering with large language models","key":"10.1016\/j.nlp.2024.100083_b43"},{"year":"2023","author":"Team","series-title":"Gemini: A family of highly capable multimodal models","key":"10.1016\/j.nlp.2024.100083_b44"},{"year":"2023","author":"Touvron","series-title":"Llama 2: Open foundation and fine-tuned chat models","key":"10.1016\/j.nlp.2024.100083_b45"},{"issue":"4","key":"10.1016\/j.nlp.2024.100083_b46","doi-asserted-by":"crossref","first-page":"360","DOI":"10.1016\/j.jagp.2014.04.002","article-title":"The PACE study: A randomized clinical trial of cognitive activity strategy training for older people with mild cognitive impairment","volume":"23","author":"Vidovich","year":"2015","journal-title":"Am. J. Geriatric Psychiatry"},{"year":"2023","author":"Zhao","series-title":"A survey of large language models","key":"10.1016\/j.nlp.2024.100083_b47"}],"container-title":["Natural Language Processing Journal"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S2949719124000311?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S2949719124000311?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,9,24]],"date-time":"2024-09-24T19:17:19Z","timestamp":1727205439000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S2949719124000311"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9]]},"references-count":47,"alternative-id":["S2949719124000311"],"URL":"https:\/\/doi.org\/10.1016\/j.nlp.2024.100083","relation":{},"ISSN":["2949-7191"],"issn-type":[{"type":"print","value":"2949-7191"}],"subject":[],"published":{"date-parts":[[2024,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Evaluating LLMs on document-based QA: Exact answer selection and numerical extraction using CogTale dataset","name":"articletitle","label":"Article Title"},{"value":"Natural Language Processing Journal","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.nlp.2024.100083","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2024 The Authors. Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"100083"}}