@inproceedings{hahn-etal-2021-self-training,
title = "Self-Training using Rules of Grammar for Few-Shot {NLU}",
author = "Hahn, Joonghyuk and
Cheon, Hyunjoon and
Han, Kyuyeol and
Lee, Cheongjae and
Kim, Junseok and
Han, Yo-Sub",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.findings-emnlp.389",
doi = "10.18653/v1/2021.findings-emnlp.389",
pages = "4576--4581",
abstract = "We tackle the problem of self-training networks for NLU in low-resource environment{---}few labeled data and lots of unlabeled data. The effectiveness of self-training is a result of increasing the amount of training data while training. Yet it becomes less effective in low-resource settings due to unreliable labels predicted by the teacher model on unlabeled data. Rules of grammar, which describe the grammatical structure of data, have been used in NLU for better explainability. We propose to use rules of grammar in self-training as a more reliable pseudo-labeling mechanism, especially when there are few labeled data. We design an effective algorithm that constructs and expands rules of grammar without human involvement. Then we integrate the constructed rules as a pseudo-labeling mechanism into self-training. There are two possible scenarios regarding data distribution: it is unknown or known in prior to training. We empirically demonstrate that our approach substantially outperforms the state-of-the-art methods in three benchmark datasets for both scenarios.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hahn-etal-2021-self-training">
<titleInfo>
<title>Self-Training using Rules of Grammar for Few-Shot NLU</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joonghyuk</namePart>
<namePart type="family">Hahn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hyunjoon</namePart>
<namePart type="family">Cheon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyuyeol</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cheongjae</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junseok</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yo-Sub</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2021</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marie-Francine</namePart>
<namePart type="family">Moens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="given">Wen-tau</namePart>
<namePart type="family">Yih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We tackle the problem of self-training networks for NLU in low-resource environment—few labeled data and lots of unlabeled data. The effectiveness of self-training is a result of increasing the amount of training data while training. Yet it becomes less effective in low-resource settings due to unreliable labels predicted by the teacher model on unlabeled data. Rules of grammar, which describe the grammatical structure of data, have been used in NLU for better explainability. We propose to use rules of grammar in self-training as a more reliable pseudo-labeling mechanism, especially when there are few labeled data. We design an effective algorithm that constructs and expands rules of grammar without human involvement. Then we integrate the constructed rules as a pseudo-labeling mechanism into self-training. There are two possible scenarios regarding data distribution: it is unknown or known in prior to training. We empirically demonstrate that our approach substantially outperforms the state-of-the-art methods in three benchmark datasets for both scenarios.</abstract>
<identifier type="citekey">hahn-etal-2021-self-training</identifier>
<identifier type="doi">10.18653/v1/2021.findings-emnlp.389</identifier>
<location>
<url>https://aclanthology.org/2021.findings-emnlp.389</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>4576</start>
<end>4581</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Self-Training using Rules of Grammar for Few-Shot NLU
%A Hahn, Joonghyuk
%A Cheon, Hyunjoon
%A Han, Kyuyeol
%A Lee, Cheongjae
%A Kim, Junseok
%A Han, Yo-Sub
%Y Moens, Marie-Francine
%Y Huang, Xuanjing
%Y Specia, Lucia
%Y Yih, Scott Wen-tau
%S Findings of the Association for Computational Linguistics: EMNLP 2021
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F hahn-etal-2021-self-training
%X We tackle the problem of self-training networks for NLU in low-resource environment—few labeled data and lots of unlabeled data. The effectiveness of self-training is a result of increasing the amount of training data while training. Yet it becomes less effective in low-resource settings due to unreliable labels predicted by the teacher model on unlabeled data. Rules of grammar, which describe the grammatical structure of data, have been used in NLU for better explainability. We propose to use rules of grammar in self-training as a more reliable pseudo-labeling mechanism, especially when there are few labeled data. We design an effective algorithm that constructs and expands rules of grammar without human involvement. Then we integrate the constructed rules as a pseudo-labeling mechanism into self-training. There are two possible scenarios regarding data distribution: it is unknown or known in prior to training. We empirically demonstrate that our approach substantially outperforms the state-of-the-art methods in three benchmark datasets for both scenarios.
%R 10.18653/v1/2021.findings-emnlp.389
%U https://aclanthology.org/2021.findings-emnlp.389
%U https://doi.org/10.18653/v1/2021.findings-emnlp.389
%P 4576-4581
Markdown (Informal)
[Self-Training using Rules of Grammar for Few-Shot NLU](https://aclanthology.org/2021.findings-emnlp.389) (Hahn et al., Findings 2021)
ACL
- Joonghyuk Hahn, Hyunjoon Cheon, Kyuyeol Han, Cheongjae Lee, Junseok Kim, and Yo-Sub Han. 2021. Self-Training using Rules of Grammar for Few-Shot NLU. In Findings of the Association for Computational Linguistics: EMNLP 2021, pages 4576–4581, Punta Cana, Dominican Republic. Association for Computational Linguistics.