@inproceedings{falke-etal-2020-leveraging,
title = "Leveraging User Paraphrasing Behavior In Dialog Systems To Automatically Collect Annotations For Long-Tail Utterances",
author = "Falke, Tobias and
Boese, Markus and
Sorokin, Daniil and
Tirkaz, Caglar and
Lehnen, Patrick",
editor = "Clifton, Ann and
Napoles, Courtney",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics: Industry Track",
month = dec,
year = "2020",
address = "Online",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2020.coling-industry.3/",
doi = "10.18653/v1/2020.coling-industry.3",
pages = "21--32",
abstract = "In large-scale commercial dialog systems, users express the same request in a wide variety of alternative ways with a long tail of less frequent alternatives. Handling the full range of this distribution is challenging, in particular when relying on manual annotations. However, the same users also provide useful implicit feedback as they often paraphrase an utterance if the dialog system failed to understand it. We propose MARUPA, a method to leverage this type of feedback by creating annotated training examples from it. MARUPA creates new data in a fully automatic way, without manual intervention or effort from annotators, and specifically for currently failing utterances. By re-training the dialog system on this new data, accuracy and coverage for long-tail utterances can be improved. In experiments, we study the effectiveness of this approach in a commercial dialog system across various domains and three languages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="falke-etal-2020-leveraging">
<titleInfo>
<title>Leveraging User Paraphrasing Behavior In Dialog Systems To Automatically Collect Annotations For Long-Tail Utterances</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tobias</namePart>
<namePart type="family">Falke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Markus</namePart>
<namePart type="family">Boese</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniil</namePart>
<namePart type="family">Sorokin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Caglar</namePart>
<namePart type="family">Tirkaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrick</namePart>
<namePart type="family">Lehnen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 28th International Conference on Computational Linguistics: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ann</namePart>
<namePart type="family">Clifton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Courtney</namePart>
<namePart type="family">Napoles</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In large-scale commercial dialog systems, users express the same request in a wide variety of alternative ways with a long tail of less frequent alternatives. Handling the full range of this distribution is challenging, in particular when relying on manual annotations. However, the same users also provide useful implicit feedback as they often paraphrase an utterance if the dialog system failed to understand it. We propose MARUPA, a method to leverage this type of feedback by creating annotated training examples from it. MARUPA creates new data in a fully automatic way, without manual intervention or effort from annotators, and specifically for currently failing utterances. By re-training the dialog system on this new data, accuracy and coverage for long-tail utterances can be improved. In experiments, we study the effectiveness of this approach in a commercial dialog system across various domains and three languages.</abstract>
<identifier type="citekey">falke-etal-2020-leveraging</identifier>
<identifier type="doi">10.18653/v1/2020.coling-industry.3</identifier>
<location>
<url>https://aclanthology.org/2020.coling-industry.3/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>21</start>
<end>32</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Leveraging User Paraphrasing Behavior In Dialog Systems To Automatically Collect Annotations For Long-Tail Utterances
%A Falke, Tobias
%A Boese, Markus
%A Sorokin, Daniil
%A Tirkaz, Caglar
%A Lehnen, Patrick
%Y Clifton, Ann
%Y Napoles, Courtney
%S Proceedings of the 28th International Conference on Computational Linguistics: Industry Track
%D 2020
%8 December
%I International Committee on Computational Linguistics
%C Online
%F falke-etal-2020-leveraging
%X In large-scale commercial dialog systems, users express the same request in a wide variety of alternative ways with a long tail of less frequent alternatives. Handling the full range of this distribution is challenging, in particular when relying on manual annotations. However, the same users also provide useful implicit feedback as they often paraphrase an utterance if the dialog system failed to understand it. We propose MARUPA, a method to leverage this type of feedback by creating annotated training examples from it. MARUPA creates new data in a fully automatic way, without manual intervention or effort from annotators, and specifically for currently failing utterances. By re-training the dialog system on this new data, accuracy and coverage for long-tail utterances can be improved. In experiments, we study the effectiveness of this approach in a commercial dialog system across various domains and three languages.
%R 10.18653/v1/2020.coling-industry.3
%U https://aclanthology.org/2020.coling-industry.3/
%U https://doi.org/10.18653/v1/2020.coling-industry.3
%P 21-32
Markdown (Informal)
[Leveraging User Paraphrasing Behavior In Dialog Systems To Automatically Collect Annotations For Long-Tail Utterances](https://aclanthology.org/2020.coling-industry.3/) (Falke et al., COLING 2020)
ACL