@inproceedings{wang-etal-2021-hi,
title = "{HI}-{CMLM}: Improve {CMLM} with Hybrid Decoder Input",
author = "Wang, Minghan and
Jiaxin, Guo and
Wang, Yuxia and
Chen, Yimeng and
Chang, Su and
Wei, Daimeng and
Zhang, Min and
Tao, Shimin and
Yang, Hao",
editor = "Belz, Anya and
Fan, Angela and
Reiter, Ehud and
Sripada, Yaji",
booktitle = "Proceedings of the 14th International Conference on Natural Language Generation",
month = aug,
year = "2021",
address = "Aberdeen, Scotland, UK",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.inlg-1.16",
doi = "10.18653/v1/2021.inlg-1.16",
pages = "167--171",
abstract = "Mask-predict CMLM (Ghazvininejad et al.,2019) has achieved stunning performance among non-autoregressive NMT models, but we find that the mechanism of predicting all of the target words only depending on the hidden state of [MASK] is not effective and efficient in initial iterations of refinement, resulting in ungrammatical repetitions and slow convergence. In this work, we mitigate this problem by combining copied source with embeddings of [MASK] in decoder. Notably. it{'}s not a straightforward copying that is shown to be useless, but a novel heuristic hybrid strategy {---} fence-mask. Experimental results show that it gains consistent boosts on both WMT14 En{\textless}-{\textgreater}De and WMT16 En{\textless}-{\textgreater}Ro corpus by 0.5 BLEU on average, and 1 BLEU for less-informative short sentences. This reveals that incorporating additional information by proper strategies is beneficial to improve CMLM, particularly translation quality of short texts and speeding up early-stage convergence.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2021-hi">
<titleInfo>
<title>HI-CMLM: Improve CMLM with Hybrid Decoder Input</title>
</titleInfo>
<name type="personal">
<namePart type="given">Minghan</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guo</namePart>
<namePart type="family">Jiaxin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuxia</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yimeng</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Su</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daimeng</namePart>
<namePart type="family">Wei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shimin</namePart>
<namePart type="family">Tao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hao</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Conference on Natural Language Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anya</namePart>
<namePart type="family">Belz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angela</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ehud</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaji</namePart>
<namePart type="family">Sripada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Aberdeen, Scotland, UK</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Mask-predict CMLM (Ghazvininejad et al.,2019) has achieved stunning performance among non-autoregressive NMT models, but we find that the mechanism of predicting all of the target words only depending on the hidden state of [MASK] is not effective and efficient in initial iterations of refinement, resulting in ungrammatical repetitions and slow convergence. In this work, we mitigate this problem by combining copied source with embeddings of [MASK] in decoder. Notably. it’s not a straightforward copying that is shown to be useless, but a novel heuristic hybrid strategy — fence-mask. Experimental results show that it gains consistent boosts on both WMT14 En\textless-\textgreaterDe and WMT16 En\textless-\textgreaterRo corpus by 0.5 BLEU on average, and 1 BLEU for less-informative short sentences. This reveals that incorporating additional information by proper strategies is beneficial to improve CMLM, particularly translation quality of short texts and speeding up early-stage convergence.</abstract>
<identifier type="citekey">wang-etal-2021-hi</identifier>
<identifier type="doi">10.18653/v1/2021.inlg-1.16</identifier>
<location>
<url>https://aclanthology.org/2021.inlg-1.16</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>167</start>
<end>171</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HI-CMLM: Improve CMLM with Hybrid Decoder Input
%A Wang, Minghan
%A Jiaxin, Guo
%A Wang, Yuxia
%A Chen, Yimeng
%A Chang, Su
%A Wei, Daimeng
%A Zhang, Min
%A Tao, Shimin
%A Yang, Hao
%Y Belz, Anya
%Y Fan, Angela
%Y Reiter, Ehud
%Y Sripada, Yaji
%S Proceedings of the 14th International Conference on Natural Language Generation
%D 2021
%8 August
%I Association for Computational Linguistics
%C Aberdeen, Scotland, UK
%F wang-etal-2021-hi
%X Mask-predict CMLM (Ghazvininejad et al.,2019) has achieved stunning performance among non-autoregressive NMT models, but we find that the mechanism of predicting all of the target words only depending on the hidden state of [MASK] is not effective and efficient in initial iterations of refinement, resulting in ungrammatical repetitions and slow convergence. In this work, we mitigate this problem by combining copied source with embeddings of [MASK] in decoder. Notably. it’s not a straightforward copying that is shown to be useless, but a novel heuristic hybrid strategy — fence-mask. Experimental results show that it gains consistent boosts on both WMT14 En\textless-\textgreaterDe and WMT16 En\textless-\textgreaterRo corpus by 0.5 BLEU on average, and 1 BLEU for less-informative short sentences. This reveals that incorporating additional information by proper strategies is beneficial to improve CMLM, particularly translation quality of short texts and speeding up early-stage convergence.
%R 10.18653/v1/2021.inlg-1.16
%U https://aclanthology.org/2021.inlg-1.16
%U https://doi.org/10.18653/v1/2021.inlg-1.16
%P 167-171
Markdown (Informal)
[HI-CMLM: Improve CMLM with Hybrid Decoder Input](https://aclanthology.org/2021.inlg-1.16) (Wang et al., INLG 2021)
ACL
- Minghan Wang, Guo Jiaxin, Yuxia Wang, Yimeng Chen, Su Chang, Daimeng Wei, Min Zhang, Shimin Tao, and Hao Yang. 2021. HI-CMLM: Improve CMLM with Hybrid Decoder Input. In Proceedings of the 14th International Conference on Natural Language Generation, pages 167–171, Aberdeen, Scotland, UK. Association for Computational Linguistics.