@inproceedings{volske-etal-2017-tl,
title = "{TL};{DR}: Mining {R}eddit to Learn Automatic Summarization",
author = {V{\"o}lske, Michael and
Potthast, Martin and
Syed, Shahbaz and
Stein, Benno},
editor = "Wang, Lu and
Cheung, Jackie Chi Kit and
Carenini, Giuseppe and
Liu, Fei",
booktitle = "Proceedings of the Workshop on New Frontiers in Summarization",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-4508/",
doi = "10.18653/v1/W17-4508",
pages = "59--63",
abstract = "Recent advances in automatic text summarization have used deep neural networks to generate high-quality abstractive summaries, but the performance of these models strongly depends on large amounts of suitable training data. We propose a new method for mining social media for author-provided summaries, taking advantage of the common practice of appending a {\textquotedblleft}TL;DR{\textquotedblright} to long posts. A case study using a large Reddit crawl yields the Webis-TLDR-17 dataset, complementing existing corpora primarily from the news genre. Our technique is likely applicable to other social media sites and general web crawls."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="volske-etal-2017-tl">
<titleInfo>
<title>TL;DR: Mining Reddit to Learn Automatic Summarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Völske</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Potthast</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shahbaz</namePart>
<namePart type="family">Syed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benno</namePart>
<namePart type="family">Stein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on New Frontiers in Summarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackie</namePart>
<namePart type="given">Chi</namePart>
<namePart type="given">Kit</namePart>
<namePart type="family">Cheung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giuseppe</namePart>
<namePart type="family">Carenini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fei</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Copenhagen, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent advances in automatic text summarization have used deep neural networks to generate high-quality abstractive summaries, but the performance of these models strongly depends on large amounts of suitable training data. We propose a new method for mining social media for author-provided summaries, taking advantage of the common practice of appending a “TL;DR” to long posts. A case study using a large Reddit crawl yields the Webis-TLDR-17 dataset, complementing existing corpora primarily from the news genre. Our technique is likely applicable to other social media sites and general web crawls.</abstract>
<identifier type="citekey">volske-etal-2017-tl</identifier>
<identifier type="doi">10.18653/v1/W17-4508</identifier>
<location>
<url>https://aclanthology.org/W17-4508/</url>
</location>
<part>
<date>2017-09</date>
<extent unit="page">
<start>59</start>
<end>63</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TL;DR: Mining Reddit to Learn Automatic Summarization
%A Völske, Michael
%A Potthast, Martin
%A Syed, Shahbaz
%A Stein, Benno
%Y Wang, Lu
%Y Cheung, Jackie Chi Kit
%Y Carenini, Giuseppe
%Y Liu, Fei
%S Proceedings of the Workshop on New Frontiers in Summarization
%D 2017
%8 September
%I Association for Computational Linguistics
%C Copenhagen, Denmark
%F volske-etal-2017-tl
%X Recent advances in automatic text summarization have used deep neural networks to generate high-quality abstractive summaries, but the performance of these models strongly depends on large amounts of suitable training data. We propose a new method for mining social media for author-provided summaries, taking advantage of the common practice of appending a “TL;DR” to long posts. A case study using a large Reddit crawl yields the Webis-TLDR-17 dataset, complementing existing corpora primarily from the news genre. Our technique is likely applicable to other social media sites and general web crawls.
%R 10.18653/v1/W17-4508
%U https://aclanthology.org/W17-4508/
%U https://doi.org/10.18653/v1/W17-4508
%P 59-63
Markdown (Informal)
[TL;DR: Mining Reddit to Learn Automatic Summarization](https://aclanthology.org/W17-4508/) (Völske et al., 2017)
ACL
- Michael Völske, Martin Potthast, Shahbaz Syed, and Benno Stein. 2017. TL;DR: Mining Reddit to Learn Automatic Summarization. In Proceedings of the Workshop on New Frontiers in Summarization, pages 59–63, Copenhagen, Denmark. Association for Computational Linguistics.