{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T13:52:11Z","timestamp":1725544331520},"publisher-location":"Berlin, Heidelberg","reference-count":32,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540334279"},{"type":"electronic","value":"9783540334286"}],"license":[{"start":{"date-parts":[[2006,1,1]],"date-time":"2006-01-01T00:00:00Z","timestamp":1136073600000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2006]]},"DOI":"10.1007\/11736790_18","type":"book-chapter","created":{"date-parts":[[2006,4,6]],"date-time":"2006-04-06T15:10:48Z","timestamp":1144336248000},"page":"309-331","source":"Crossref","is-referenced-by-count":1,"title":["Evaluating Semantic Evaluations: How RTE Measures Up"],"prefix":"10.1007","author":[{"given":"Sam","family":"Bayer","sequence":"first","affiliation":[]},{"given":"John","family":"Burger","sequence":"additional","affiliation":[]},{"given":"Lisa","family":"Ferro","sequence":"additional","affiliation":[]},{"given":"John","family":"Henderson","sequence":"additional","affiliation":[]},{"given":"Lynette","family":"Hirschman","sequence":"additional","affiliation":[]},{"given":"Alex","family":"Yeh","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"18_CR1","unstructured":"Aberdeen, J., Condon, S., Doran, C., Harper, L., Oshika, B., Phillips, J.: Evaluation of speech-to-speech translation systems (2005) (unpublished manuscript)"},{"key":"18_CR2","unstructured":"Aberdeen, J., Hirschman, L., Walker, M.: Evaluation for DARPA Communicator spoken dialogue systems. In: Proceedings of the 2nd Conference on Language Resources and Evaluation (2000)"},{"key":"18_CR3","unstructured":"Bayer, S., Burger, J., Ferro, L., Henderson, J., Yeh, A.: MITRE\u2019s submissions to the EU Pascal RTE challenge. In: PASCAL Proceedings of the First Challenge Workshop, Recognizing Textual Entailment, Southampton, U.K. (2005)"},{"key":"18_CR4","unstructured":"Bayer, S., Burger, J., Greiff, W., Wellner, B.: The MITRE logical form generation system. In: Proceedings of Senseval-3: The Third International Workshop on the Evaluation of Systems for the Semantic Analysis of Text, pp. 69\u201372 (2004)"},{"key":"18_CR5","doi-asserted-by":"crossref","unstructured":"Bond, T.G., Fox, C.M.: Applying the Rasch Model: Fundamental Measurement in the Human Sciences. University of Toledo Press (2001)","DOI":"10.4324\/9781410600127"},{"key":"18_CR6","unstructured":"Bos, J., Markert, K.: Combining shallow and deep NLP methods for recognizing textual entailment. In: PASCAL Proceedings of the First Challenge Workshop, Recognizing Textual Entailment, Southampton, U.K. (2005)"},{"key":"18_CR7","doi-asserted-by":"crossref","unstructured":"Brachman, R. (AA)AI: More than the sum of its parts. AAAI Presidential Address. In: Presented at AAAI 2005 (2005)","DOI":"10.12968\/indn.2005.1.2.73920"},{"key":"18_CR8","unstructured":"Brown, P.F., Della Pietra, S.A., Della Pietra, V.J., Mercer, R.L.: The mathematics of statistical machine translation. Computational Linguistics\u00a019 (1993)"},{"key":"18_CR9","doi-asserted-by":"crossref","unstructured":"Burger, J., Ferro, L.: Generating an entailment corpus from news headlines. In: ACL Workshop on Empirical Modeling of Semantic Equivalence and Entailment, Ann Arbor, MI (2005)","DOI":"10.3115\/1631862.1631871"},{"key":"18_CR10","doi-asserted-by":"crossref","unstructured":"Dagan, I., Glickman, O., Magnini, B.: The PASCAL recognizing textual entailment challenge. In: PASCAL Proceedings of the First Challenge Workshop, Recognizing Textual Entailment, Southampton, U.K. (2005)","DOI":"10.1007\/11736790_9"},{"key":"18_CR11","doi-asserted-by":"crossref","unstructured":"Damianos, L., Wohlever, S., Kozierok, R., Ponte, J.: MiTAP for real users, real data, real problems. In: Proceedings of the Conference on Human Factors of Computing Systems, Fort Lauderdale, FL (2003)","DOI":"10.1145\/765891.765913"},{"key":"18_CR12","doi-asserted-by":"crossref","unstructured":"Deshmukh, N., Duncan, R., Ganapathiraju, A., Picone, J.: Benchmarking human performance for continuous speech recognition. In: Proceedings of the Fourth International Conference on Spoken Language Processing, Philadelphia, Pennsylvania, USA, pp. 2486\u20132489 (1996)","DOI":"10.1109\/ICSLP.1996.607317"},{"key":"18_CR13","unstructured":"Dolan, B., Brockett, C., Quirk, C.: Microsoft Research paraphrase corpus (2005), http:\/\/research.microsoft.com\/research\/nlp\/msr_paraphrase.htm"},{"key":"18_CR14","unstructured":"Graff, D.: English Gigaword (2003), http:\/\/www.ldc.upenn.edu\/Catalog\/CatalogEntry.jsp?catalogId=LDC2003T05"},{"volume-title":"Proceedings of the Sixth Message Understanding Conference (MUC-6)","year":"1995","author":"R. Grishman","key":"18_CR15","unstructured":"Grishman, R., Sundheim, B.: Design of the MUC-6 evaluation. In: Proceedings of the Sixth Message Understanding Conference (MUC-6), Columbia, MD NIST. Morgan Kaufmann, San Francisco (1995)"},{"key":"18_CR16","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511574931","volume-title":"Algorithms on Strings, Trees and Sequences","author":"D. Gusfield","year":"1997","unstructured":"Gusfield, D.: Algorithms on Strings, Trees and Sequences. Cambridge University Press, Cambridge (1997)"},{"key":"18_CR17","unstructured":"Henderson, J., Morgan, W.: Paris: an automated MT evaluation metric toolkit; and a survey of metric performance on the segment ranking task. Technical report, MITRE (2005) (to appear)"},{"key":"18_CR18","doi-asserted-by":"publisher","first-page":"281","DOI":"10.1006\/csla.1998.0102","volume":"12","author":"L. Hirschman","year":"1998","unstructured":"Hirschman, L.: The evolution of evaluation: Lessons from the message understanding conferences. Computer Speech and Language\u00a012, 281\u2013305 (1998)","journal-title":"Computer Speech and Language"},{"key":"18_CR19","unstructured":"Hirschman, L.: Language understanding evaluations: Lessons learned from MUC and ATIS. In: Proceedings of LREC 1998, Granada (1998)"},{"key":"18_CR20","doi-asserted-by":"crossref","unstructured":"Hirschman, L., Bates, M., Dahl, D., Fisher, W.M., Garafolo, J., Pallet, D.S., Hunicke- Smith, K., Price, P., Rudnicky, A., Tzoukermann, E.: Multisite data collection and evaluation in spoken language understanding. In: Proceedings of the DARPA Workshop on Human Language Technology, Princeton, NJ, pp. 19\u201324 (1993)","DOI":"10.3115\/1075671.1075676"},{"key":"18_CR21","doi-asserted-by":"crossref","unstructured":"Hirschman, L., Light, M., Breck, E., Burger, J.D.: Deep Read: A reading comprehension system. In: Proceedings of the 37th Annual Meeting of the Association for Computational Linguistics (1999)","DOI":"10.3115\/1034678.1034731"},{"key":"18_CR22","doi-asserted-by":"crossref","unstructured":"Hirschman, L., Yeh, A., Blaschke, C., Valencia, A.: Overview of BioCreAtIvE: Critical assessment of information extraction for biology. BMC Bioinformatics\u00a06(suppl. 1) (2005)","DOI":"10.1186\/1471-2105-6-S1-S1"},{"key":"18_CR23","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-0907-3","volume-title":"Learning to Classify Text Using Support Vector Machines","author":"T. Joachims","year":"2002","unstructured":"Joachims, T.: Learning to Classify Text Using Support Vector Machines. Kluwer, Dordrecht (2002)"},{"key":"18_CR24","unstructured":"Lange, R., Moran, J., Greiff, W., Ferro, L.: A probabilistic Rasch analysis of question answering evaluations. In: Proceedings of HLT-NAACL 2004, pp. 65\u201372 (2004)"},{"key":"18_CR25","doi-asserted-by":"publisher","first-page":"325","DOI":"10.1017\/S1351324901002819","volume":"7","author":"M. Light","year":"2001","unstructured":"Light, M., Mann, G.S., Riloff, E., Breck, E.: Analyses for elucidating current question answering technology. Natural Language Engineering\u00a07, 325\u2013342 (2001)","journal-title":"Natural Language Engineering"},{"key":"18_CR26","doi-asserted-by":"publisher","first-page":"396","DOI":"10.1016\/j.jbi.2004.08.010","volume":"37","author":"A. Morgan","year":"2004","unstructured":"Morgan, A., Hirschman, L., Colosimo, M., Yeh, A., Colombe, J.: Gene name identification and normalization using a model organism database. Journal of Biomedical Informatics\u00a037, 396\u2013410 (2004)","journal-title":"Journal of Biomedical Informatics"},{"key":"18_CR27","doi-asserted-by":"crossref","unstructured":"Och, F.J., Ney, H.: A systematic comparison of various statistical alignment models. Computational Linguistics\u00a029 (2003)","DOI":"10.1162\/089120103321337421"},{"key":"18_CR28","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Henderson, J., Reeder, F.: Corpus-based comprehensive and diagnostic MT evaluation: Initial Arabic, Chinese, French, and Spanish results. In: Proceedings of the 2002 Conference on Human Language Technology, San Diego, CA, pp. 124\u2013127 (2002)","DOI":"10.3115\/1289189.1289272"},{"volume-title":"Proceedings of the Sixth Message Understanding Conference (MUC-6)","year":"1995","author":"B. Sundheim","key":"18_CR29","unstructured":"Sundheim, B.: Overview of results of the MUC-6 evaluation. In: Proceedings of the Sixth Message Understanding Conference (MUC-6), Columbia, MD. NIST. Morgan Kaufmann, San Francisco (1995)"},{"key":"18_CR30","unstructured":"Tong, S., Koller, D.: Support vector machine active learning with applications to text classification. In: Proceedings of ICML 2000, 17th International Conference on Machine Learning (2000)"},{"key":"18_CR31","doi-asserted-by":"crossref","unstructured":"Walker, M., Aberdeen, J., Boland, J., Bratt, E., Garofolo, J., Hirschman, L., Le, A., Lee, S., Narayanan, S., Papineni, K., Pellom, B., Polifroni, J., Potamianos, A., Prabhu, P., Rudnicky, A., Sanders, G., Seneff, S., Stallard, D., Whittaker, S.: DARPA Communicator dialog travel planning systems: The June 2000 data collection. In: Proceedings of Eurospeech 2001, Aalborg, Denmark (2001)","DOI":"10.21437\/Eurospeech.2001-355"},{"key":"18_CR32","doi-asserted-by":"crossref","unstructured":"Wellner, B., Ferro, L., Greiff, W., Hirschman, L.: Reading comprehension tests for computer-based understanding evaluation. Natural Language Engineering (2005) (to appear)","DOI":"10.1017\/S1351324905004018"}],"container-title":["Lecture Notes in Computer Science","Machine Learning Challenges. Evaluating Predictive Uncertainty, Visual Object Classification, and Recognising Tectual Entailment"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/11736790_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,6]],"date-time":"2023-05-06T23:17:33Z","timestamp":1683415053000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/11736790_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2006]]},"ISBN":["9783540334279","9783540334286"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/11736790_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2006]]}}}