@inproceedings{brook-weiss-etal-2022-extending,
title = "Extending Multi-Text Sentence Fusion Resources via Pyramid Annotations",
author = "Brook Weiss, Daniela and
Roit, Paul and
Ernst, Ori and
Dagan, Ido",
editor = "Carpuat, Marine and
de Marneffe, Marie-Catherine and
Meza Ruiz, Ivan Vladimir",
booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.naacl-main.135/",
doi = "10.18653/v1/2022.naacl-main.135",
pages = "1854--1860",
abstract = "NLP models that process multiple texts often struggle in recognizing corresponding and salient information that is often differently phrased, and consolidating the redundancies across texts. To facilitate research of such challenges, the sentence fusion task was proposed, yet previous datasets for this task were very limited in their size and scope. In this paper, we revisit and substantially extend previous dataset creation efforts. With careful modifications, relabeling, and employing complementing data sources, we were able to more than triple the size of a notable earlier dataset. Moreover, we show that our extended version uses more representative texts for multi-document tasks and provides a more diverse training set, which substantially improves model performance."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="brook-weiss-etal-2022-extending">
<titleInfo>
<title>Extending Multi-Text Sentence Fusion Resources via Pyramid Annotations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daniela</namePart>
<namePart type="family">Brook Weiss</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Roit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ori</namePart>
<namePart type="family">Ernst</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ido</namePart>
<namePart type="family">Dagan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-Catherine</namePart>
<namePart type="family">de Marneffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="given">Vladimir</namePart>
<namePart type="family">Meza Ruiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>NLP models that process multiple texts often struggle in recognizing corresponding and salient information that is often differently phrased, and consolidating the redundancies across texts. To facilitate research of such challenges, the sentence fusion task was proposed, yet previous datasets for this task were very limited in their size and scope. In this paper, we revisit and substantially extend previous dataset creation efforts. With careful modifications, relabeling, and employing complementing data sources, we were able to more than triple the size of a notable earlier dataset. Moreover, we show that our extended version uses more representative texts for multi-document tasks and provides a more diverse training set, which substantially improves model performance.</abstract>
<identifier type="citekey">brook-weiss-etal-2022-extending</identifier>
<identifier type="doi">10.18653/v1/2022.naacl-main.135</identifier>
<location>
<url>https://aclanthology.org/2022.naacl-main.135/</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>1854</start>
<end>1860</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Extending Multi-Text Sentence Fusion Resources via Pyramid Annotations
%A Brook Weiss, Daniela
%A Roit, Paul
%A Ernst, Ori
%A Dagan, Ido
%Y Carpuat, Marine
%Y de Marneffe, Marie-Catherine
%Y Meza Ruiz, Ivan Vladimir
%S Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, United States
%F brook-weiss-etal-2022-extending
%X NLP models that process multiple texts often struggle in recognizing corresponding and salient information that is often differently phrased, and consolidating the redundancies across texts. To facilitate research of such challenges, the sentence fusion task was proposed, yet previous datasets for this task were very limited in their size and scope. In this paper, we revisit and substantially extend previous dataset creation efforts. With careful modifications, relabeling, and employing complementing data sources, we were able to more than triple the size of a notable earlier dataset. Moreover, we show that our extended version uses more representative texts for multi-document tasks and provides a more diverse training set, which substantially improves model performance.
%R 10.18653/v1/2022.naacl-main.135
%U https://aclanthology.org/2022.naacl-main.135/
%U https://doi.org/10.18653/v1/2022.naacl-main.135
%P 1854-1860
Markdown (Informal)
[Extending Multi-Text Sentence Fusion Resources via Pyramid Annotations](https://aclanthology.org/2022.naacl-main.135/) (Brook Weiss et al., NAACL 2022)
ACL
- Daniela Brook Weiss, Paul Roit, Ori Ernst, and Ido Dagan. 2022. Extending Multi-Text Sentence Fusion Resources via Pyramid Annotations. In Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pages 1854–1860, Seattle, United States. Association for Computational Linguistics.