@inproceedings{ning-etal-2020-torque,
title = "{TORQUE}: A Reading Comprehension Dataset of Temporal Ordering Questions",
author = "Ning, Qiang and
Wu, Hao and
Han, Rujun and
Peng, Nanyun and
Gardner, Matt and
Roth, Dan",
editor = "Webber, Bonnie and
Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.88/",
doi = "10.18653/v1/2020.emnlp-main.88",
pages = "1158--1172",
abstract = "A critical part of reading is being able to understand the temporal relationships between events described in a passage of text, even when those relationships are not explicitly stated. However, current machine reading comprehension benchmarks have practically no questions that test temporal phenomena, so systems trained on these benchmarks have no capacity to answer questions such as {\textquotedblleft}what happened before/after [some event]?{\textquotedblright} We introduce TORQUE, a new English reading comprehension benchmark built on 3.2k news snippets with 21k human-generated questions querying temporal relationships. Results show that RoBERTa-large achieves an exact-match score of 51{\%} on the test set of TORQUE, about 30{\%} behind human performance."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ning-etal-2020-torque">
<titleInfo>
<title>TORQUE: A Reading Comprehension Dataset of Temporal Ordering Questions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qiang</namePart>
<namePart type="family">Ning</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hao</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rujun</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nanyun</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matt</namePart>
<namePart type="family">Gardner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Roth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bonnie</namePart>
<namePart type="family">Webber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A critical part of reading is being able to understand the temporal relationships between events described in a passage of text, even when those relationships are not explicitly stated. However, current machine reading comprehension benchmarks have practically no questions that test temporal phenomena, so systems trained on these benchmarks have no capacity to answer questions such as “what happened before/after [some event]?” We introduce TORQUE, a new English reading comprehension benchmark built on 3.2k news snippets with 21k human-generated questions querying temporal relationships. Results show that RoBERTa-large achieves an exact-match score of 51% on the test set of TORQUE, about 30% behind human performance.</abstract>
<identifier type="citekey">ning-etal-2020-torque</identifier>
<identifier type="doi">10.18653/v1/2020.emnlp-main.88</identifier>
<location>
<url>https://aclanthology.org/2020.emnlp-main.88/</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>1158</start>
<end>1172</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TORQUE: A Reading Comprehension Dataset of Temporal Ordering Questions
%A Ning, Qiang
%A Wu, Hao
%A Han, Rujun
%A Peng, Nanyun
%A Gardner, Matt
%A Roth, Dan
%Y Webber, Bonnie
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F ning-etal-2020-torque
%X A critical part of reading is being able to understand the temporal relationships between events described in a passage of text, even when those relationships are not explicitly stated. However, current machine reading comprehension benchmarks have practically no questions that test temporal phenomena, so systems trained on these benchmarks have no capacity to answer questions such as “what happened before/after [some event]?” We introduce TORQUE, a new English reading comprehension benchmark built on 3.2k news snippets with 21k human-generated questions querying temporal relationships. Results show that RoBERTa-large achieves an exact-match score of 51% on the test set of TORQUE, about 30% behind human performance.
%R 10.18653/v1/2020.emnlp-main.88
%U https://aclanthology.org/2020.emnlp-main.88/
%U https://doi.org/10.18653/v1/2020.emnlp-main.88
%P 1158-1172
Markdown (Informal)
[TORQUE: A Reading Comprehension Dataset of Temporal Ordering Questions](https://aclanthology.org/2020.emnlp-main.88/) (Ning et al., EMNLP 2020)
ACL