{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T22:37:27Z","timestamp":1730327847416,"version":"3.28.0"},"publisher-location":"New York, NY, USA","reference-count":152,"publisher":"ACM","funder":[{"name":"Bundesministerium f\u00fcr Bildung und Forschung","award":["ScaDS.AI"]},{"name":"S\u00e4chsisches Staatsministerium f\u00fcr Wissenschaft und Kunst","award":["ScaDS.AI"]},{"name":"Horizon Europe European Research Council","award":["101070014"]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,10]]},"DOI":"10.1145\/3626772.3657849","type":"proceedings-article","created":{"date-parts":[[2024,7,11]],"date-time":"2024-07-11T16:40:05Z","timestamp":1720716005000},"page":"1916-1929","update-policy":"http:\/\/dx.doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Evaluating Generative Ad Hoc Information Retrieval"],"prefix":"10.1145","author":[{"ORCID":"http:\/\/orcid.org\/0000-0001-5707-3751","authenticated-orcid":false,"given":"Lukas","family":"Gienapp","sequence":"first","affiliation":[{"name":"Leipzig University & ScaDS.AI, Leipzig, Germany"}]},{"ORCID":"http:\/\/orcid.org\/0000-0001-9578-7157","authenticated-orcid":false,"given":"Harrisen","family":"Scells","sequence":"additional","affiliation":[{"name":"Leipzig University, Leipzig, Germany"}]},{"ORCID":"http:\/\/orcid.org\/0000-0001-6803-1223","authenticated-orcid":false,"given":"Niklas","family":"Deckers","sequence":"additional","affiliation":[{"name":"Leipzig University & ScaDS.AI, Leipzig, Germany"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-3797-0559","authenticated-orcid":false,"given":"Janek","family":"Bevendorff","sequence":"additional","affiliation":[{"name":"Leipzig University, Leipzig, Germany"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-0726-5250","authenticated-orcid":false,"given":"Shuai","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Information Technology and Electrical Engineering, The University of Queensland, Brisbane, Australia"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-1617-6508","authenticated-orcid":false,"given":"Johannes","family":"Kiesel","sequence":"additional","affiliation":[{"name":"Bauhaus-Universit\u00e4t Weimar, Weimar, Germany"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-4821-1507","authenticated-orcid":false,"given":"Shahbaz","family":"Syed","sequence":"additional","affiliation":[{"name":"Leipzig University, Leipzig, Germany"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-1003-981X","authenticated-orcid":false,"given":"Maik","family":"Fr\u00f6be","sequence":"additional","affiliation":[{"name":"Friedrich-Schiller-Universit\u00e4t Jena, Jena, Germany"}]},{"ORCID":"http:\/\/orcid.org\/0000-0003-0271-5563","authenticated-orcid":false,"given":"Guido","family":"Zuccon","sequence":"additional","affiliation":[{"name":"School of Information Technology and Electrical Engineering, The University of Queensland, Brisbane, Australia"}]},{"ORCID":"http:\/\/orcid.org\/0000-0001-9033-2217","authenticated-orcid":false,"given":"Benno","family":"Stein","sequence":"additional","affiliation":[{"name":"Bauhaus-Universit\u00e4t Weimar, Weimar, Germany"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-9733-2890","authenticated-orcid":false,"given":"Matthias","family":"Hagen","sequence":"additional","affiliation":[{"name":"Friedrich-Schiller-Universit\u00e4t Jena, Jena, Germany"}]},{"ORCID":"http:\/\/orcid.org\/0000-0003-2451-0665","authenticated-orcid":false,"given":"Martin","family":"Potthast","sequence":"additional","affiliation":[{"name":"University of Kassel & hessian.AI, Kassel, Germany"}]}],"member":"320","published-online":{"date-parts":[[2024,7,11]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"92","article-title":"Evaluation Methodologies in Information Retrieval (Dagstuhl Seminar 13441)","volume":"3","author":"Agosti Maristella","year":"2014","unstructured":"Maristella Agosti, Norbert Fuhr, Elaine Toms, and Pertti Vakkari. 2014. Evaluation Methodologies in Information Retrieval (Dagstuhl Seminar 13441). Dagstuhl Reports, Vol. 3, 10 (2014), 92--126.","journal-title":"Dagstuhl Reports"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591960"},{"key":"e_1_3_2_1_3_1","first-page":"4","volume-title":"McFarlane","author":"Alkaissi Hussam","year":"2023","unstructured":"Hussam Alkaissi and Samy I. McFarlane. 2023. Artificial Hallucinations in ChatGPT: Implications in Scientific Writing. Cureus, Vol. 15, 2 (2023), bibinfonumpages4 pages."},{"key":"e_1_3_2_1_4_1","volume-title":"ECIR 2024, Glasgow, UK, March 24--28, 2024, Proceedings, Part II (Lecture Notes in Computer Science","volume":"414","author":"Arabzadeh Negar","unstructured":"Negar Arabzadeh, Amin Bigdeli, and Charles L. A. Clarke. 2024. Adapting Standard Retrieval Benchmarks to Evaluate Generated Answers. In Advances in Information Retrieval - 46th European Conference on Information Retrieval, ECIR 2024, Glasgow, UK, March 24--28, 2024, Proceedings, Part II (Lecture Notes in Computer Science, Vol. 14609), Nazli Goharian, Nicola Tonellotto, Yulan He, Aldo Lipani, Graham McDonald, Craig Macdonald, and Iadh Ounis (Eds.). Springer, 399--414."},{"key":"e_1_3_2_1_5_1","volume-title":"Clarke","author":"Arabzadeh Negar","year":"2024","unstructured":"Negar Arabzadeh and Charles L. A. Clarke. 2024. A Comparison of Methods for Evaluating Generative IR. arXiv 2404.04044."},{"key":"e_1_3_2_1_6_1","volume-title":"Nagarajan Natarajan, Gaurav Sinha, and Amit Sharma.","author":"Arora Daman","year":"2023","unstructured":"Daman Arora, Anush Kini, Sayak Ray Chowdhury, Nagarajan Natarajan, Gaurav Sinha, and Amit Sharma. 2023. GAR-meets-RAG Paradigm for Zero-Shot Information Retrieval. arXiv 2310.20158."},{"key":"e_1_3_2_1_7_1","unstructured":"AutoGPT Contributors. 2023. AutoGPT: The Heart of the Open-Source Agent Ecosystem. https:\/\/github.com\/Significant-Gravitas\/AutoGPT."},{"key":"e_1_3_2_1_8_1","volume-title":"Proceedings of the Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization@ACL 2005","author":"Banerjee Satanjeev","year":"2005","unstructured":"Satanjeev Banerjee and Alon Lavie. 2005. METEOR: An Automatic Metric for MT Evaluation with Improved Correlation with Human Judgments. In Proceedings of the Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization@ACL 2005, Ann Arbor, Michigan, USA, June 29, 2005, Jade Goldstein, Alon Lavie, Chin-Yew Lin, and Clare R. Voss (Eds.). Association for Computational Linguistics, 65--72."},{"key":"e_1_3_2_1_9_1","unstructured":"Christine Bauer Ben Carterette Nicola Ferro and Norbert Fuhr. 2023. Report from Dagstuhl Seminar 23031: Frontiers of Information Access Experimentation for Research and Education. arXiv 2305.01509."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591923"},{"key":"e_1_3_2_1_11_1","volume-title":"Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022","author":"Bevilacqua Michele","year":"2022","unstructured":"Michele Bevilacqua, Giuseppe Ottaviano, Patrick S. H. Lewis, Scott Yih, Sebastian Riedel, and Fabio Petroni. 2022. Autoregressive Search Engines: Generating Substrings as Document Identifiers. In Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans, LA, USA, November 28 -- December 9, 2022. bibinfonumpages16 pages."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.751"},{"key":"e_1_3_2_1_13_1","first-page":"17","article-title":"Improving Language Models by Retrieving from Trillions of Tokens. In International Conference on Machine Learning","volume":"2022","author":"Borgeaud Sebastian","year":"2022","unstructured":"Sebastian Borgeaud, Arthur Mensch, Jordan Hoffmann, Trevor Cai, Eliza Rutherford, Katie Millican, George van den Driessche, Jean-Baptiste Lespiau, Bogdan Damoc, Aidan Clark, Diego de Las Casas, Aurelia Guy, Jacob Menick, Roman Ring, Tom Hennigan, Saffron Huang, Loren Maggiore, Chris Jones, Albin Cassirer, Andy Brock, Michela Paganini, Geoffrey Irving, Oriol Vinyals, Simon Osindero, Karen Simonyan, Jack W. Rae, Erich Elsen, and Laurent Sifre. 2022. Improving Language Models by Retrieving from Trillions of Tokens. In International Conference on Machine Learning, ICML 2022, 17--23 July 2022, Baltimore, Maryland, USA (Proceedings of Machine Learning Research, Vol. 162), Kamalika Chaudhuri, Stefanie Jegelka, Le Song, Csaba Szepesv\u00e1 ri, Gang Niu, and Sivan Sabato (Eds.). PMLR, 2206--2240.","journal-title":"ICML"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/792550.792552"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2070719.2070722"},{"key":"e_1_3_2_1_16_1","volume-title":"ECIR 2022, Stavanger, Norway, April 10--14, 2022, Proceedings, Part I (Lecture Notes in Computer Science","volume":"156","author":"Arthur C\u00e2","year":"2022","unstructured":"Arthur C\u00e2 mara, David Maxwell, and Claudia Hauff. 2022. Searching, Learning, and Subtopic Ordering: A Simulation-Based Analysis. In Advances in Information Retrieval - 44th European Conference on IR Research, ECIR 2022, Stavanger, Norway, April 10--14, 2022, Proceedings, Part I (Lecture Notes in Computer Science, Vol. 13185), Matthias Hagen, Suzan Verberne, Craig Macdonald, Christin Seifert, Krisztian Balog, Kjetil N\u00f8rv\u00e5g, and Vinay Setty (Eds.). Springer, 142--156."},{"key":"e_1_3_2_1_17_1","volume-title":"Quantifying Human-Perceived Answer Utility in Non-factoid Question Answering. In CHIIR '21: ACM SIGIR Conference on Human Information Interaction and Retrieval","author":"Cambazoglu Berkant Barla","year":"2021","unstructured":"Berkant Barla Cambazoglu, Valeria Bolotova-Baranova, Falk Scholer, Mark Sanderson, Leila Tavakoli, and W. Bruce Croft. 2021. Quantifying Human-Perceived Answer Utility in Non-factoid Question Answering. In CHIIR '21: ACM SIGIR Conference on Human Information Interaction and Retrieval, Canberra, ACT, Australia, March 14--19, 2021, Falk Scholer, Paul Thomas, David Elsweiler, Hideo Joho, Noriko Kando, and Catherine Smith (Eds.). ACM, 75--84."},{"key":"e_1_3_2_1_18_1","unstructured":"Robert Capra and Jaime Arguello. 2023. How does AI Chat Change Search Behaviors? arXiv 2307.03826."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/2009916.2010037"},{"key":"e_1_3_2_1_20_1","volume-title":"GERE: Generative Evidence Retrieval for Fact Verification. In SIGIR '22: The 45th International ACM SIGIR Conference on Research and Development in Information Retrieval","author":"Chen Jiangui","year":"2022","unstructured":"Jiangui Chen, Ruqing Zhang, Jiafeng Guo, Yixing Fan, and Xueqi Cheng. 2022. GERE: Generative Evidence Retrieval for Fact Verification. In SIGIR '22: The 45th International ACM SIGIR Conference on Research and Development in Information Retrieval, Madrid, Spain, July 11 -- 15, 2022, Enrique Amig\u00f3, Pablo Castells, Julio Gonzalo, Ben Carterette, J. Shane Culpepper, and Gabriella Kazai (Eds.). ACM, 2184--2189."},{"key":"e_1_3_2_1_21_1","unstructured":"Tong Chen Hongwei Wang Sihao Chen Wenhao Yu Kaixin Ma Xinran Zhao Hongming Zhang and Dong Yu. 2023. Dense X Retrieval: What Retrieval Granularity Should We Use? arXiv 2312.06648."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390334.1390446"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1108\/eb050097"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.6028\/NIST.SP.1266.deep-overview"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.6028\/NIST.SP.500-335.deep-overview"},{"key":"e_1_3_2_1_26_1","volume-title":"Proceedings of the Thirty-First Text REtrieval Conference, TREC 2022","author":"Craswell Nick","year":"2022","unstructured":"Nick Craswell, Bhaskar Mitra, Emine Yilmaz, Daniel Campos, Jimmy Lin, Ellen M. Voorhees, and Ian Soboroff. 2022. Overview of the TREC 2022 Deep Learning Track. In Proceedings of the Thirty-First Text REtrieval Conference, TREC 2022, online, November 15--19, 2022 (NIST Special Publication, Vol. 500--338), Ian Soboroff and Angela Ellis (Eds.). National Institute of Standards and Technology (NIST), bibinfonumpages21 pages."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3274784.3274788"},{"key":"e_1_3_2_1_28_1","volume-title":"Overview of DUC 2005. In DUC 2005, Document Understanding Workshop October 9--10","author":"Dang Hoa Trang","year":"2005","unstructured":"Hoa Trang Dang. 2005. Overview of DUC 2005. In DUC 2005, Document Understanding Workshop October 9--10, 2005, Vancouver, B.C., Canada. 1--12."},{"key":"e_1_3_2_1_29_1","volume-title":"ACL 2007, Proceedings of the 45th Annual Meeting of the Association for Computational Linguistics, June 23--30","author":"Dang Hoa Trang","year":"2007","unstructured":"Hoa Trang Dang and Jimmy Lin. 2007. Different Structures for Evaluating Answers to Complex Questions: Pyramids Won't Topple, and Neither Will Human Assessors. In ACL 2007, Proceedings of the 45th Annual Meeting of the Association for Computational Linguistics, June 23--30, 2007, Prague, Czech Republic, John Carroll, Antal van den Bosch, and Annie Zaenen (Eds.). The Association for Computational Linguistics, 768--775."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.6028\/NIST.SP.500-272.qa-overview"},{"key":"e_1_3_2_1_31_1","first-page":"20","volume-title":"Autoregressive Entity Retrieval. In 9th International Conference on Learning Representations, ICLR 2021","author":"Cao Nicola De","year":"2021","unstructured":"Nicola De Cao, Gautier Izacard, Sebastian Riedel, and Fabio Petroni. 2021. Autoregressive Entity Retrieval. In 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3--7, 2021. OpenReview.net, bibinfonumpages20 pages."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3576840.3578327"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.3115\/1289189.1289273"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.3115\/1626394.1626425"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00373"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3578337.3605136"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","unstructured":"Thibault Formal Carlos Lassance Benjamin Piwowarski and St\u00e9phane Clinchant. 2021. SPLADE v2: Sparse Lexical and Expansion Model for Information Retrieval. arXiv 2109.10086.","DOI":"10.1145\/3404835.3463098"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.3389\/fnsys.2013.00039"},{"key":"e_1_3_2_1_39_1","volume-title":"ECIR 2023, Dublin, Ireland, April 2--6, 2023, Proceedings, Part I (Lecture Notes in Computer Science","volume":"329","author":"Maik Fr\u00f6","year":"2023","unstructured":"Maik Fr\u00f6 be, Lukas Gienapp, Martin Potthast, and Matthias Hagen. 2023. Bootstrapped nDCG Estimation in the Presence of Unjudged Documents. In Advances in Information Retrieval - 45th European Conference on Information Retrieval, ECIR 2023, Dublin, Ireland, April 2--6, 2023, Proceedings, Part I (Lecture Notes in Computer Science, Vol. 13980), Jaap Kamps, Lorraine Goeuriot, Fabio Crestani, Maria Maistro, Hideo Joho, Brian Davis, Cathal Gurrin, Udo Kruschwitz, and Annalina Caputo (Eds.). Springer, 313--329."},{"key":"e_1_3_2_1_40_1","unstructured":"Jinlan Fu See-Kiong Ng Zhengbao Jiang and Pengfei Liu. 2023. GPTScore: Evaluate as You Desire. arXiv 2302.04166."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.295"},{"key":"e_1_3_2_1_42_1","unstructured":"Yunfan Gao Yun Xiong Xinyu Gao Kangxiang Jia Jinliu Pan Yuxi Bi Yi Dai Jiawei Sun Qianyu Guo Meng Wang and Haofen Wang. 2023. Retrieval-Augmented Generation for Large Language Models: A Survey. arXiv 2312.10997."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.511"},{"key":"e_1_3_2_1_44_1","volume-title":"ECIR 2023, Dublin, Ireland, April 2--6, 2023, Proceedings, Part II (Lecture Notes in Computer Science","volume":"422","author":"Gospodinov Mitko","year":"2023","unstructured":"Mitko Gospodinov, Sean MacAvaney, and Craig Macdonald. 2023. mboxDoc2Query-: When Less is More. In Advances in Information Retrieval - 45th European Conference on Information Retrieval, ECIR 2023, Dublin, Ireland, April 2--6, 2023, Proceedings, Part II (Lecture Notes in Computer Science, Vol. 13981), Jaap Kamps, Lorraine Goeuriot, Fabio Crestani, Maria Maistro, Hideo Joho, Brian Davis, Cathal Gurrin, Udo Kruschwitz, and Annalina Caputo (Eds.). Springer, 414--422."},{"key":"e_1_3_2_1_45_1","volume-title":"REALM: Retrieval-Augmented Language Model Pre-Training. arXiv","author":"Guu Kelvin","year":"2020","unstructured":"Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat, and Ming-Wei Chang. 2020. REALM: Retrieval-Augmented Language Model Pre-Training. arXiv 2002.08909."},{"key":"e_1_3_2_1_46_1","volume-title":"Alessandro Moschitti, Preslav Nakov, and Massimo Nicosia.","author":"Francisco Guzm\u00e1","year":"2014","unstructured":"Francisco Guzm\u00e1 n, Shafiq R. Joty, Llu'i s M\u00e0 rquez, Alessandro Moschitti, Preslav Nakov, and Massimo Nicosia. 2014. Learning to Differentiate Better from Worse Translations. In Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing, EMNLP 2014, October 25--29, 2014, Doha, Qatar, A meeting of SIGDAT, a Special Interest Group of the ACL, Alessandro Moschitti, Bo Pang, and Walter Daelemans (Eds.). ACL, 214--220."},{"key":"e_1_3_2_1_47_1","volume-title":"Llu'i s M\u00e0 rquez, and Preslav Nakov","author":"Francisco Guzm\u00e1","year":"2015","unstructured":"Francisco Guzm\u00e1 n, Shafiq R. Joty, Llu'i s M\u00e0 rquez, and Preslav Nakov. 2015. Pairwise Neural Machine Translation Evaluation. In Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing of the Asian Federation of Natural Language Processing, ACL 2015, July 26--31, 2015, Beijing, China, Volume 1: Long Papers. The Association for Computer Linguistics, 805--814."},{"key":"e_1_3_2_1_48_1","volume-title":"Characterizing Relevance with Eye-Tracking Measures. In Fifth Information Interaction in Context Symposium, IIiX '14","author":"Gwizdka Jacek","year":"2014","unstructured":"Jacek Gwizdka. 2014. Characterizing Relevance with Eye-Tracking Measures. In Fifth Information Interaction in Context Symposium, IIiX '14, Regensburg, Germany, August 26--29, 2014, David Elsweiler, Bernd Ludwig, Leif Azzopardi, and Max L. Wilson (Eds.). ACM, 58--67."},{"volume-title":"Search User Interfaces","author":"Hearst Marti A.","key":"e_1_3_2_1_49_1","unstructured":"Marti A. Hearst. 2009. Search User Interfaces. Cambridge University Press."},{"key":"e_1_3_2_1_50_1","unstructured":"Yi-Chong Huang Xia-Chong Feng Xiao-Cheng Feng and Bing Qin. 2021. The Factual Inconsistency Problem in Abstractive Text Summarization: A Survey. arXiv 2104.14839."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.eacl-main.74"},{"key":"e_1_3_2_1_52_1","volume-title":"ACM Trans. Inf. Syst.","volume":"20","author":"Jaana Kalervo","year":"2002","unstructured":"Kalervo J\"a rvelin and Jaana Kek\"a l\"a inen. 2002. Cumulated Gain-Based Evaluation of IR Techniques. ACM Trans. Inf. Syst. , Vol. 20, 4 (2002), 422--446."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3571730"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.149"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.495"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.456"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000012"},{"key":"e_1_3_2_1_58_1","volume-title":"David Hall, Percy Liang, Christopher Potts, and Matei Zaharia.","author":"Khattab Omar","year":"2022","unstructured":"Omar Khattab, Keshav Santhanam, Xiang Lisa Li, David Hall, Percy Liang, Christopher Potts, and Matei Zaharia. 2022. Demonstrate-Search-Predict: Composing Retrieval and Language Models for Knowledge-Intensive NLP. arXiv 2212.14024."},{"key":"e_1_3_2_1_59_1","first-page":"69","volume-title":"Decomposed Prompting: A Modular Approach for Solving Complex Tasks. In The Eleventh International Conference on Learning Representations, ICLR 2023","author":"Khot Tushar","year":"2023","unstructured":"Tushar Khot, Harsh Trivedi, Matthew Finlayson, Yao Fu, Kyle Richardson, Peter Clark, and Ashish Sabharwal. 2023. Decomposed Prompting: A Modular Approach for Solving Complex Tasks. In The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1--5, 2023. OpenReview.net, bibinfonumpages69 pages."},{"key":"e_1_3_2_1_60_1","unstructured":"Emre Kiciman Robert Ness Amit Sharma and Chenhao Tan. 2023. Causal Reasoning and Large Language Models: Opening a New Frontier for Causality. arXiv 2305.00050."},{"key":"e_1_3_2_1_61_1","volume-title":"ECIR 2021, Virtual Event, March 28 -- April 1, 2021, Proceedings, Part II (Lecture Notes in Computer Science","volume":"74","author":"Kiesel Johannes","year":"2021","unstructured":"Johannes Kiesel, Lars Meyer, Florian Kneist, Benno Stein, and Martin Potthast. 2021. An Empirical Comparison of Web Page Segmentation Algorithms. In Advances in Information Retrieval - 43rd European Conference on IR Research, ECIR 2021, Virtual Event, March 28 -- April 1, 2021, Proceedings, Part II (Lecture Notes in Computer Science, Vol. 12657), Djoerd Hiemstra, Marie-Francine Moens, Josiane Mothe, Raffaele Perego, Martin Potthast, and Fabrizio Sebastiani (Eds.). Springer, 62--74."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00799-023-00369-y"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.18653\/V1"},{"volume-title":"Proceedings of the 10th Conference on Theoretical and Methodological Issues in Machine Translation of Natural Languages. bibinfonumpages10 pages.","author":"Kulesza Alex","key":"e_1_3_2_1_64_1","unstructured":"Alex Kulesza and Stuart M. Shieber. 2004. A Learning Approach to Improving Sentence-Level MT Evaluation. In Proceedings of the 10th Conference on Theoretical and Methodological Issues in Machine Translation of Natural Languages. bibinfonumpages10 pages."},{"key":"e_1_3_2_1_65_1","unstructured":"Angeliki Lazaridou Elena Gribovskaya Wojciech Stokowiec and Nikolai Grigorev. 2022. Internet-Augmented Language Models Through Few-Shot Prompting for Open-Domain Question Answering. arXiv 2203.05115."},{"key":"e_1_3_2_1_66_1","first-page":"16","volume-title":"Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. In Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020","author":"Lewis Patrick S. H.","year":"2020","unstructured":"Patrick S. H. Lewis, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich K\u00fc ttler, Mike Lewis, Wen-tau Yih, Tim Rockt\"a schel, Sebastian Riedel, and Douwe Kiela. 2020. Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. In Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6--12, 2020, virtual, Hugo Larochelle, Marc'Aurelio Ranzato, Raia Hadsell, Maria-Florina Balcan, and Hsuan-Tien Lin (Eds.). bibinfonumpages16 pages."},{"key":"e_1_3_2_1_67_1","volume-title":"Proceedings of Machine Translation Summit XIV: Papers, MTSummit 2013","author":"Li Maoxi","year":"2013","unstructured":"Maoxi Li, Aiwen Jiang, and Mingwen Wang. 2013. Listwise Approach to Learning to Rank for Automatic Evaluation of Machine Translation. In Proceedings of Machine Translation Summit XIV: Papers, MTSummit 2013, Nice, France, September 2--6, 2013, Andy Way, Khalil Sima'an, and Mikel L. Forcada (Eds.). bibinfonumpages8 pages."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3271764"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331205"},{"key":"e_1_3_2_1_70_1","volume-title":"ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out. Association for Computational Linguistics, 74--81."},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000073"},{"key":"e_1_3_2_1_72_1","volume-title":"Evaluating Verifiability in Generative Search Engines. In Findings of the Association for Computational Linguistics: EMNLP 2023","author":"Liu Nelson F.","year":"2023","unstructured":"Nelson F. Liu, Tianyi Zhang, and Percy Liang. 2023 b. Evaluating Verifiability in Generative Search Engines. In Findings of the Association for Computational Linguistics: EMNLP 2023, Singapore, December 6--10, 2023, Houda Bouamor, Juan Pino, and Kalika Bali (Eds.). Association for Computational Linguistics, 7001--7025."},{"key":"e_1_3_2_1_73_1","volume-title":"Design Guidelines for Prompt Engineering Text-to-Image Generative Models. In CHI '22: CHI Conference on Human Factors in Computing Systems","author":"Liu Vivian","year":"2022","unstructured":"Vivian Liu and Lydia B. Chilton. 2022. Design Guidelines for Prompt Engineering Text-to-Image Generative Models. In CHI '22: CHI Conference on Human Factors in Computing Systems, New Orleans, LA, USA, 29 April 2022 -- 5 May 2022, Simone D. J. Barbosa, Cliff Lampe, Caroline Appert, David A. Shamma, Steven Mark Drucker, Julie R. Williamson, and Koji Yatani (Eds.). ACM, 384:1--384:23."},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.228"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.eval4nlp-1.1"},{"key":"e_1_3_2_1_76_1","unstructured":"Xueguang Ma Xinyu Zhang Ronak Pradeep and Jimmy Lin. 2023. Zero-Shot Listwise Document Reranking with a Large Language Model. arXiv 2305.02156."},{"key":"e_1_3_2_1_77_1","unstructured":"Sean MacAvaney Craig Macdonald Roderick Murray-Smith and Iadh Ounis. 2021. IntenT5: Search Result Diversification using Causal Language Models. arXiv 2108.04026."},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401262"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1145\/3002172"},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1145\/2983323.2983805"},{"key":"e_1_3_2_1_81_1","doi-asserted-by":"publisher","DOI":"10.1145\/2806416.2806476"},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080824"},{"key":"e_1_3_2_1_83_1","volume-title":"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, ACL 2020, Online, July 5--10, 2020","author":"Maynez Joshua","year":"1906","unstructured":"Joshua Maynez, Shashi Narayan, Bernd Bohnet, and Ryan T. McDonald. 2020. On Faithfulness and Factuality in Abstractive Summarization. In Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, ACL 2020, Online, July 5--10, 2020, Dan Jurafsky, Joyce Chai, Natalie Schluter, and Joel R. Tetreault (Eds.). Association for Computational Linguistics, 1906--1919."},{"key":"e_1_3_2_1_84_1","volume-title":"Christoforos Nalmpantis, Ramakanth Pasunuru, Roberta Raileanu, Baptiste Rozi\u00e8 re, Timo Schick, Jane Dwivedi-Yu, Asli Celikyilmaz, Edouard Grave, Yann LeCun, and Thomas Scialom.","author":"Mialon Gr\u00e9","year":"2023","unstructured":"Gr\u00e9 goire Mialon, Roberto Dess`i, Maria Lomeli, Christoforos Nalmpantis, Ramakanth Pasunuru, Roberta Raileanu, Baptiste Rozi\u00e8 re, Timo Schick, Jane Dwivedi-Yu, Asli Celikyilmaz, Edouard Grave, Yann LeCun, and Thomas Scialom. 2023. Augmented Language Models: A Survey. arXiv 2302.07842."},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-2014"},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.1145\/2838931.2838938"},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"publisher","DOI":"10.1145\/3052768"},{"key":"e_1_3_2_1_88_1","doi-asserted-by":"publisher","DOI":"10.1145\/2505515.2507665"},{"key":"e_1_3_2_1_89_1","doi-asserted-by":"crossref","unstructured":"Preslav Nakov Giovanni Da San Martino Tamer Elsayed Alberto Barr\u00f3 n-Cede n o Rub\u00e9 n M'i guez Shaden Shaar Firoj Alam Fatima Haouari Maram Hasanain Watheq Mansour Bayan Hamdan Zien Sheikh Ali Nikolay Babulkov Alex Nikolov Gautam Kishore Shahi Julia Maria Stru\u00df Thomas Mandl M\u00fc cahid Kutlu and Yavuz Selim Kartal. 2021. Overview of the CLEF-2021 CheckThat! Lab on Detecting Check-Worthy Claims Previously Fact-Checked Claims and Fake News. In Experimental IR Meets Multilinguality Multimodality and Interaction - 12th International Conference of the CLEF Association CLEF 2021 Virtual Event September 21--24 2021 Proceedings (Lecture Notes in Computer Science Vol. 12880) K. Selcc uk Candan Bogdan Ionescu Lorraine Goeuriot Birger Larsen Henning M\u00fc ller Alexis Joly Maria Maistro Florina Piroi Guglielmo Faggioli and Nicola Ferro (Eds.). Springer 264--291.","DOI":"10.1007\/978-3-030-85251-1_19"},{"key":"e_1_3_2_1_90_1","first-page":"19","volume-title":"7th International Conference on Learning Representations, ICLR 2019","author":"Nalisnick Eric T.","year":"2019","unstructured":"Eric T. Nalisnick, Akihiro Matsukawa, Yee Whye Teh, Dilan G\u00f6 r\u00fc r, and Balaji Lakshminarayanan. 2019. Do Deep Generative Models Know What They Don't Know?. In 7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6--9, 2019. OpenReview.net, bibinfonumpages19 pages."},{"key":"e_1_3_2_1_91_1","doi-asserted-by":"publisher","DOI":"10.1145\/1233912.1233913"},{"key":"e_1_3_2_1_92_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1315"},{"key":"e_1_3_2_1_93_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.63"},{"key":"e_1_3_2_1_94_1","volume-title":"Document Expansion by Query Prediction. arXiv","author":"Nogueira Rodrigo Frassetto","year":"1904","unstructured":"Rodrigo Frassetto Nogueira, Wei Yang, Jimmy Lin, and Kyunghyun Cho. 2019. Document Expansion by Query Prediction. arXiv 1904.08375."},{"key":"e_1_3_2_1_95_1","volume-title":"Proceedings of the Seventh AAAI Conference on Human Computation and Crowdsourcing, HCOMP 2019","author":"Nourani Mahsan","year":"2019","unstructured":"Mahsan Nourani, Samia Kabir, Sina Mohseni, and Eric D. Ragan. 2019. The Effects of Meaningful and Meaningless Explanations on Trust and Perceived System Accuracy in Intelligent Systems. In Proceedings of the Seventh AAAI Conference on Human Computation and Crowdsourcing, HCOMP 2019, Stevenson, WA, USA, October 28--30, 2019, Edith Law and Jennifer Wortman Vaughan (Eds.). AAAI Press, 97--105."},{"key":"e_1_3_2_1_96_1","volume-title":"Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics, July 6--12","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. BLEU: A Method for Automatic Evaluation of Machine Translation. In Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics, July 6--12, 2002, Philadelphia, PA, USA. ACL, 311--318."},{"key":"e_1_3_2_1_97_1","volume-title":"Sequencing Matters: A Generate-Retrieve-Generate Model for Building Conversational Agents. arXiv 2311.09513.","author":"Patwardhan Quinn","year":"2023","unstructured":"Quinn Patwardhan and Grace Hui Yang. 2023. Sequencing Matters: A Generate-Retrieve-Generate Model for Building Conversational Agents. arXiv 2311.09513."},{"key":"e_1_3_2_1_98_1","doi-asserted-by":"publisher","DOI":"10.1080\/1461670032000136596"},{"key":"e_1_3_2_1_99_1","doi-asserted-by":"crossref","unstructured":"Zhen Qin Rolf Jagerman Kai Hui Honglei Zhuang Junru Wu Jiaming Shen Tianqi Liu Jialu Liu Donald Metzler Xuanhui Wang and Michael Bendersky. 2023. Large Language Models are Effective Text Rankers with Pairwise Ranking Prompting. arXiv 2306.17563.","DOI":"10.18653\/v1\/2024.findings-naacl.97"},{"key":"e_1_3_2_1_100_1","first-page":"469","article-title":"Generating Natural Language Summaries from Multiple On-Line","volume":"24","author":"Radev Dragomir R.","year":"1998","unstructured":"Dragomir R. Radev and Kathleen R. McKeown. 1998. Generating Natural Language Summaries from Multiple On-Line Sources. Comput. Linguistics, Vol. 24, 3 (1998), 469--500.","journal-title":"Sources. Comput. Linguistics"},{"key":"e_1_3_2_1_101_1","doi-asserted-by":"publisher","DOI":"10.1145\/3020165.3020183"},{"key":"e_1_3_2_1_102_1","doi-asserted-by":"crossref","unstructured":"Ori Ram Yoav Levine Itay Dalmedigos Dor Muhlgay Amnon Shashua Kevin Leyton-Brown and Yoav Shoham. 2023. In-Context Retrieval-Augmented Language Models. arXiv 2302.00083.","DOI":"10.1162\/tacl_a_00605"},{"key":"e_1_3_2_1_103_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591890"},{"key":"e_1_3_2_1_104_1","doi-asserted-by":"publisher","DOI":"10.1145\/2362724.2362776"},{"key":"e_1_3_2_1_105_1","volume-title":"Prompt Programming for Large Language Models: Beyond the Few-Shot Paradigm. In CHI '21: CHI Conference on Human Factors in Computing Systems, Virtual Event \/ Yokohama Japan, May 8--13","author":"Reynolds Laria","year":"2021","unstructured":"Laria Reynolds and Kyle McDonell. 2021. Prompt Programming for Large Language Models: Beyond the Few-Shot Paradigm. In CHI '21: CHI Conference on Human Factors in Computing Systems, Virtual Event \/ Yokohama Japan, May 8--13, 2021, Extended Abstracts, Yoshifumi Kitamura, Aaron Quigley, Katherine Isbister, and Takeo Igarashi (Eds.). ACM, 314:1--314:7."},{"key":"e_1_3_2_1_106_1","first-page":"28","volume-title":"Leveraging Large Language Models for Multiple Choice Question Answering. In The Eleventh International Conference on Learning Representations, ICLR 2023","author":"Robinson Joshua","year":"2023","unstructured":"Joshua Robinson and David Wingate. 2023. Leveraging Large Language Models for Multiple Choice Question Answering. In The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1--5, 2023. OpenReview.net, bibinfonumpages28 pages."},{"key":"e_1_3_2_1_107_1","volume-title":"On Fine-Grained Relevance Scales. In The 41st International ACM SIGIR Conference on Research & Development in Information Retrieval, SIGIR 2018","author":"Roitero Kevin","year":"2018","unstructured":"Kevin Roitero, Eddy Maddalena, Gianluca Demartini, and Stefano Mizzaro. 2018. On Fine-Grained Relevance Scales. In The 41st International ACM SIGIR Conference on Research & Development in Information Retrieval, SIGIR 2018, Ann Arbor, MI, USA, July 08--12, 2018, Kevyn Collins-Thompson, Qiaozhu Mei, Brian D. Davison, Yiqun Liu, and Emine Yilmaz (Eds.). ACM, 675--684."},{"key":"e_1_3_2_1_108_1","volume-title":"SWAN: A Generic Framework for Auditing Textual Conversational Systems. arXiv 2305.08290.","author":"Sakai Tetsuya","year":"2023","unstructured":"Tetsuya Sakai. 2023. SWAN: A Generic Framework for Auditing Textual Conversational Systems. arXiv 2305.08290."},{"key":"e_1_3_2_1_109_1","doi-asserted-by":"publisher","DOI":"10.1145\/2063576.2063669"},{"key":"e_1_3_2_1_110_1","doi-asserted-by":"publisher","DOI":"10.7759\/cureus.35029"},{"key":"e_1_3_2_1_112_1","volume-title":"Proceedings of the Third AAAI Conference on Human Computation and Crowdsourcing, HCOMP 2015","author":"Sameki Mehrnoosh","year":"2015","unstructured":"Mehrnoosh Sameki, Aditya Barua, and Praveen K. Paritosh. 2016. Rigorously Collecting Commonsense Judgments for Complex Question-Answer Content. In Proceedings of the Third AAAI Conference on Human Computation and Crowdsourcing, HCOMP 2015, November 8--11, 2015, San Diego, California, USA, Volume 3, Elizabeth Gerber and Panos Ipeirotis (Eds.). AAAI Press, 26--33."},{"key":"e_1_3_2_1_113_1","volume-title":"Proceedings of the Second International Conference on Design of Experimental Search & Information REtrieval Systems","author":"David","year":"2021","unstructured":"David P. Sander and Laura Dietz. 2021. EXAM: How to Evaluate Retrieve-and-Generate Systems for Users Who Do Not (Yet) Know What They Want. In Proceedings of the Second International Conference on Design of Experimental Search & Information REtrieval Systems, Padova, Italy, September 15--18, 2021 (CEUR Workshop Proceedings, Vol. 2950), Omar Alonso, Stefano Marchesin, Marc Najork, and Gianmaria Silvello (Eds.). CEUR-WS.org, 136--146."},{"key":"e_1_3_2_1_114_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000009"},{"key":"e_1_3_2_1_115_1","volume-title":"Human Interpretation of Saliency-based Explanation Over Text. In FAccT '22: 2022 ACM Conference on Fairness, Accountability, and Transparency","author":"Schuff Hendrik","year":"2022","unstructured":"Hendrik Schuff, Alon Jacovi, Heike Adel, Yoav Goldberg, and Ngoc Thang Vu. 2022. Human Interpretation of Saliency-based Explanation Over Text. In FAccT '22: 2022 ACM Conference on Fairness, Accountability, and Transparency, Seoul, Republic of Korea, June 21--24, 2022. ACM, 611--636."},{"key":"e_1_3_2_1_116_1","volume-title":"Lam","author":"Semnani Sina J.","year":"2023","unstructured":"Sina J. Semnani, Violet Z. Yao, Heidi C. Zhang, and Monica S. Lam. 2023. WikiChat: A Few-Shot LLM-Based Chatbot Grounded with Wikipedia. arXiv 2305.14292."},{"key":"e_1_3_2_1_117_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.411"},{"key":"e_1_3_2_1_118_1","volume-title":"REPLUG: Retrieval-Augmented Black-Box Language Models. arXiv 2301.12652.","author":"Shi Weijia","year":"2023","unstructured":"Weijia Shi, Sewon Min, Michihiro Yasunaga, Minjoon Seo, Rich James, Mike Lewis, Luke Zettlemoyer, and Wen-tau Yih. 2023. REPLUG: Retrieval-Augmented Black-Box Language Models. arXiv 2301.12652."},{"key":"e_1_3_2_1_119_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.346"},{"key":"e_1_3_2_1_120_1","volume-title":"Proceedings of the 7th Conference of the Association for Machine Translation in the Americas: Technical Papers, AMTA 2006","author":"Snover Matthew G.","year":"2006","unstructured":"Matthew G. Snover, Bonnie J. Dorr, Richard M. Schwartz, Linnea Micciulla, and John Makhoul. 2006. A Study of Translation Edit Rate with Targeted Human Annotation. In Proceedings of the 7th Conference of the Association for Machine Translation in the Americas: Technical Papers, AMTA 2006, Cambridge, Massachusetts, USA, August 8--12, 2006. Association for Machine Translation in the Americas, 223--231."},{"key":"e_1_3_2_1_121_1","volume-title":"Proceedings of the Sixth Workshop on Statistical Machine Translation, WMT@EMNLP 2011","author":"Song Xingyi","year":"2011","unstructured":"Xingyi Song and Trevor Cohn. 2011. Regression and Ranking based Optimisation for Sentence Level MT Evaluation. In Proceedings of the Sixth Workshop on Statistical Machine Translation, WMT@EMNLP 2011, Edinburgh, Scotland, UK, July 30--31, 2011, Chris Callison-Burch, Philipp Koehn, Christof Monz, and Omar Zaidan (Eds.). Association for Computational Linguistics, 123--129."},{"key":"e_1_3_2_1_122_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.60"},{"key":"e_1_3_2_1_123_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.923"},{"key":"e_1_3_2_1_124_1","volume-title":"Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022","author":"Tay Yi","year":"2022","unstructured":"Yi Tay, Vinh Tran, Mostafa Dehghani, Jianmo Ni, Dara Bahri, Harsh Mehta, Zhen Qin, Kai Hui, Zhe Zhao, Jai Prakash Gupta, Tal Schuster, William W. Cohen, and Donald Metzler. 2022. Transformer Memory as a Differentiable Search Index. In Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans, LA, USA, November 28 -- December 9, 2022, Sanmi Koyejo, S. Mohamed, A. Agarwal, Danielle Belgrave, K. Cho, and A. Oh (Eds.). bibinfonumpages13 pages."},{"key":"e_1_3_2_1_125_1","doi-asserted-by":"crossref","unstructured":"James Thorne. 2022. Data-Efficient Autoregressive Document Retrieval for Fact Verification. arXiv 2211.09388.","DOI":"10.18653\/v1\/2022.sustainlp-1.7"},{"key":"e_1_3_2_1_126_1","doi-asserted-by":"publisher","DOI":"10.1177\/0165551515615833"},{"key":"e_1_3_2_1_127_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.609"},{"key":"e_1_3_2_1_128_1","volume-title":"Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022","author":"Wang Yujing","year":"2022","unstructured":"Yujing Wang, Yingyan Hou, Haonan Wang, Ziming Miao, Shibin Wu, Qi Chen, Yuqing Xia, Chengmin Chi, Guoshuai Zhao, Zheng Liu, Xing Xie, Hao Sun, Weiwei Deng, Qi Zhang, and Mao Yang. 2022. A Neural Corpus Indexer for Document Retrieval. In Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans, LA, USA, November 28 -- December 9, 2022, Sanmi Koyejo, S. Mohamed, A. Agarwal, Danielle Belgrave, K. Cho, and A. Oh (Eds.). bibinfonumpages15 pages."},{"key":"e_1_3_2_1_129_1","volume-title":"Schmidt","author":"White Jules","year":"2023","unstructured":"Jules White, Quchen Fu, Sam Hays, Michael Sandborn, Carlos Olea, Henry Gilbert, Ashraf Elnashar, Jesse Spencer-Smith, and Douglas C. Schmidt. 2023. A Prompt Pattern Catalog to Enhance Prompt Engineering with ChatGPT. arXiv 2302.11382."},{"key":"e_1_3_2_1_130_1","volume-title":"Wikipedia: Verifiability, not Truth. https:\/\/web.archive.org\/web\/20230627143645\/https:\/\/en.wikipedia.org\/wiki\/Wikipedia:Verifiability,_not_truth. Accessed: 2023-06--27.","author":"Foundation Wikimedia","year":"2023","unstructured":"Wikimedia Foundation. 2023. Wikipedia: Verifiability, not Truth. https:\/\/web.archive.org\/web\/20230627143645\/https:\/\/en.wikipedia.org\/wiki\/Wikipedia:Verifiability,_not_truth. Accessed: 2023-06--27."},{"volume-title":"Interfaces for Information Retrieval","author":"Wilson Max L.","key":"e_1_3_2_1_131_1","unstructured":"Max L. Wilson. 2011. Interfaces for Information Retrieval. In Interactive Information Seeking, Behaviour and Retrieval, Ian Ruthven and Diane Kelly (Eds.). Facet Publishing, 139--170."},{"key":"e_1_3_2_1_132_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583343"},{"key":"e_1_3_2_1_133_1","unstructured":"Chengrun Yang Xuezhi Wang Yifeng Lu Hanxiao Liu Quoc V. Le Denny Zhou and Xinyun Chen. 2023. Large Language Models as Optimizers. arXiv 2309.03409."},{"key":"e_1_3_2_1_134_1","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3084154"},{"key":"e_1_3_2_1_135_1","first-page":"27","volume-title":"The Eleventh International Conference on Learning Representations, ICLR 2023","author":"Yu Wenhao","year":"2023","unstructured":"Wenhao Yu, Dan Iter, Shuohang Wang, Yichong Xu, Mingxuan Ju, Soumya Sanyal, Chenguang Zhu, Michael Zeng, and Meng Jiang. 2023. Generate Rather Than Retrieve: Large Language Models are Strong Context Generators. In The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1--5, 2023. OpenReview.net, bibinfonumpages27 pages."},{"key":"e_1_3_2_1_136_1","volume-title":"Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021","author":"Yuan Weizhe","year":"2021","unstructured":"Weizhe Yuan, Graham Neubig, and Pengfei Liu. 2021. BARTScore: Evaluating Generated Text as Text Generation. In Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6--14, 2021, virtual, Marc'Aurelio Ranzato, Alina Beygelzimer, Yann N. Dauphin, Percy Liang, and Jennifer Wortman Vaughan (Eds.). 27263--27277."},{"key":"e_1_3_2_1_137_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.307"},{"key":"e_1_3_2_1_138_1","volume-title":"Proceedings of the 43rd International ACM SIGIR conference on research and development in Information Retrieval, SIGIR 2020","author":"Zamani Hamed","year":"2020","unstructured":"Hamed Zamani, Bhaskar Mitra, Everest Chen, Gord Lueck, Fernando Diaz, Paul N. Bennett, Nick Craswell, and Susan T. Dumais. 2020. Analyzing and Learning from User Interactions for Search Clarification. In Proceedings of the 43rd International ACM SIGIR conference on research and development in Information Retrieval, SIGIR 2020, Virtual Event, China, July 25--30, 2020, Jimmy X. Huang, Yi Chang, Xueqi Cheng, Jaap Kamps, Vanessa Murdock, Ji-Rong Wen, and Yiqun Liu (Eds.). ACM, 1181--1190."},{"key":"e_1_3_2_1_139_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557711"},{"key":"e_1_3_2_1_140_1","unstructured":"Dake Zhang and Ronak Pradeep. 2023. ReadProbe: A Demo of Retrieval-Enhanced Large Language Models to Support Lateral Reading. arXiv 2306.07875."},{"key":"e_1_3_2_1_141_1","volume-title":"Chatty Goose: A Python Framework for Conversational Search. In SIGIR '21: The 44th International ACM SIGIR Conference on Research and Development in Information Retrieval","author":"Zhang Edwin","year":"2021","unstructured":"Edwin Zhang, Sheng-Chieh Lin, Jheng-Hong Yang, Ronak Pradeep, Rodrigo Frassetto Nogueira, and Jimmy Lin. 2021a. Chatty Goose: A Python Framework for Conversational Search. In SIGIR '21: The 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, Virtual Event, Canada, July 11--15, 2021, Fernando Diaz, Chirag Shah, Torsten Suel, Pablo Castells, Rosie Jones, and Tetsuya Sakai (Eds.). ACM, 2521--2525."},{"key":"e_1_3_2_1_142_1","first-page":"43","volume-title":"BERTScore: Evaluating Text Generation with BERT. In 8th International Conference on Learning Representations, ICLR 2020","author":"Zhang Tianyi","year":"2020","unstructured":"Tianyi Zhang, Varsha Kishore, Felix Wu, Kilian Q. Weinberger, and Yoav Artzi. 2020. BERTScore: Evaluating Text Generation with BERT. In 8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, April 26--30, 2020. OpenReview.net, bibinfonumpages43 pages."},{"key":"e_1_3_2_1_143_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441758"},{"key":"e_1_3_2_1_144_1","volume-title":"Bosheng Ding, and Lidong Bing.","author":"Zhao Ruochen","year":"2023","unstructured":"Ruochen Zhao, Xingxuan Li, Yew Ken Chia, Bosheng Ding, and Lidong Bing. 2023. Can ChatGPT-like Generative Models Guarantee Factual Accuracy? On the Mistakes of New Generation Search Engines. arXiv 2304.11076."},{"key":"e_1_3_2_1_145_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1053"},{"key":"e_1_3_2_1_146_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10791-011-9178-4"},{"key":"e_1_3_2_1_147_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331231"},{"key":"e_1_3_2_1_148_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11633-022-1373-9"},{"key":"e_1_3_2_1_149_1","volume-title":"Proceedings of the 14th International Conference on Information Quality, ICIQ 2009, Hasso Plattner Institute","author":"Zhu Zhemin","year":"2009","unstructured":"Zhemin Zhu, Delphine Bernhard, and Iryna Gurevych. 2009. A Multi-Dimensional Model for Assessing the Quality of Answers in Social Q&A Sites. In Proceedings of the 14th International Conference on Information Quality, ICIQ 2009, Hasso Plattner Institute, University of Potsdam, Germany, November 7--8 2009, Paul L. Bowen, Ahmed K. Elmagarmid, Hubert \u00d6 sterle, and Kai-Uwe Sattler (Eds.). HPI\/MIT, 264--265."},{"key":"e_1_3_2_1_150_1","doi-asserted-by":"crossref","unstructured":"Honglei Zhuang Zhen Qin Kai Hui Junru Wu Le Yan Xuanhui Wang and Michael Berdersky. 2023. Beyond Yes and No: Improving Zero-Shot LLM Rankers via Scoring Fine-Grained Relevance Labels. arXiv 2310.14122.","DOI":"10.18653\/v1\/2024.naacl-short.31"},{"key":"e_1_3_2_1_151_1","unstructured":"Shengyao Zhuang Houxing Ren Linjun Shou Jian Pei Ming Gong Guido Zuccon and Daxin Jiang. 2022. Bridging the Gap Between Indexing and Retrieval for Differentiable Search Index with Query Generation. arXiv 2206.10128."},{"key":"e_1_3_2_1_152_1","unstructured":"Shengyao Zhuang and Guido Zuccon. 2021. Fast Passage Re-ranking with Contextualized Exact Term Matching and Efficient Passage Expansion. arXiv 2108.08513."},{"key":"e_1_3_2_1_153_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.167"}],"event":{"name":"SIGIR 2024: The 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Washington DC USA","acronym":"SIGIR 2024"},"container-title":["Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3626772.3657849","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,18]],"date-time":"2024-10-18T06:03:31Z","timestamp":1729231411000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626772.3657849"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,10]]},"references-count":152,"alternative-id":["10.1145\/3626772.3657849","10.1145\/3626772"],"URL":"https:\/\/doi.org\/10.1145\/3626772.3657849","relation":{},"subject":[],"published":{"date-parts":[[2024,7,10]]},"assertion":[{"value":"2024-07-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}