@inproceedings{slobodkin-etal-2022-controlled,
title = "Controlled Text Reduction",
author = "Slobodkin, Aviv and
Roit, Paul and
Hirsch, Eran and
Ernst, Ori and
Dagan, Ido",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.emnlp-main.385/",
doi = "10.18653/v1/2022.emnlp-main.385",
pages = "5699--5715",
abstract = "Producing a reduced version of a source text, as in generic or focused summarization, inherently involves two distinct subtasks: deciding on targeted content and generating a coherent text conveying it. While some popular approaches address summarization as a single end-to-end task, prominent works support decomposed modeling for individual subtasks. Further, semi-automated text reduction is also very appealing, where users may identify targeted content while models would generate a corresponding coherent summary.In this paper, we focus on the second subtask, of generating coherent text given pre-selected content. Concretely, we formalize \textit{Controlled Text Reduction} as a standalone task, whose input is a source text with marked spans of targeted content ({\textquotedblleft}highlighting{\textquotedblright}).A model then needs to generate a coherent text that includes all and only the target information.We advocate the potential of such models, both for modular fully-automatic summarization, as well as for semi-automated human-in-the-loop use cases.Facilitating proper research, we crowdsource high-quality dev and test datasets for the task. Further, we automatically generate a larger {\textquotedblleft}silver{\textquotedblright} training dataset from available summarization benchmarks, leveraging a pretrained summary-source alignment model.Finally, employing these datasets, we present a supervised baseline model, showing promising results and insightful analyses."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="slobodkin-etal-2022-controlled">
<titleInfo>
<title>Controlled Text Reduction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aviv</namePart>
<namePart type="family">Slobodkin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Roit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eran</namePart>
<namePart type="family">Hirsch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ori</namePart>
<namePart type="family">Ernst</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ido</namePart>
<namePart type="family">Dagan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Producing a reduced version of a source text, as in generic or focused summarization, inherently involves two distinct subtasks: deciding on targeted content and generating a coherent text conveying it. While some popular approaches address summarization as a single end-to-end task, prominent works support decomposed modeling for individual subtasks. Further, semi-automated text reduction is also very appealing, where users may identify targeted content while models would generate a corresponding coherent summary.In this paper, we focus on the second subtask, of generating coherent text given pre-selected content. Concretely, we formalize Controlled Text Reduction as a standalone task, whose input is a source text with marked spans of targeted content (“highlighting”).A model then needs to generate a coherent text that includes all and only the target information.We advocate the potential of such models, both for modular fully-automatic summarization, as well as for semi-automated human-in-the-loop use cases.Facilitating proper research, we crowdsource high-quality dev and test datasets for the task. Further, we automatically generate a larger “silver” training dataset from available summarization benchmarks, leveraging a pretrained summary-source alignment model.Finally, employing these datasets, we present a supervised baseline model, showing promising results and insightful analyses.</abstract>
<identifier type="citekey">slobodkin-etal-2022-controlled</identifier>
<identifier type="doi">10.18653/v1/2022.emnlp-main.385</identifier>
<location>
<url>https://aclanthology.org/2022.emnlp-main.385/</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>5699</start>
<end>5715</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Controlled Text Reduction
%A Slobodkin, Aviv
%A Roit, Paul
%A Hirsch, Eran
%A Ernst, Ori
%A Dagan, Ido
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F slobodkin-etal-2022-controlled
%X Producing a reduced version of a source text, as in generic or focused summarization, inherently involves two distinct subtasks: deciding on targeted content and generating a coherent text conveying it. While some popular approaches address summarization as a single end-to-end task, prominent works support decomposed modeling for individual subtasks. Further, semi-automated text reduction is also very appealing, where users may identify targeted content while models would generate a corresponding coherent summary.In this paper, we focus on the second subtask, of generating coherent text given pre-selected content. Concretely, we formalize Controlled Text Reduction as a standalone task, whose input is a source text with marked spans of targeted content (“highlighting”).A model then needs to generate a coherent text that includes all and only the target information.We advocate the potential of such models, both for modular fully-automatic summarization, as well as for semi-automated human-in-the-loop use cases.Facilitating proper research, we crowdsource high-quality dev and test datasets for the task. Further, we automatically generate a larger “silver” training dataset from available summarization benchmarks, leveraging a pretrained summary-source alignment model.Finally, employing these datasets, we present a supervised baseline model, showing promising results and insightful analyses.
%R 10.18653/v1/2022.emnlp-main.385
%U https://aclanthology.org/2022.emnlp-main.385/
%U https://doi.org/10.18653/v1/2022.emnlp-main.385
%P 5699-5715
Markdown (Informal)
[Controlled Text Reduction](https://aclanthology.org/2022.emnlp-main.385/) (Slobodkin et al., EMNLP 2022)
ACL
- Aviv Slobodkin, Paul Roit, Eran Hirsch, Ori Ernst, and Ido Dagan. 2022. Controlled Text Reduction. In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, pages 5699–5715, Abu Dhabi, United Arab Emirates. Association for Computational Linguistics.