@inproceedings{trijakwanich-etal-2021-robust-fragment,
title = "Robust Fragment-Based Framework for Cross-lingual Sentence Retrieval",
author = "Trijakwanich, Nattapol and
Limkonchotiwat, Peerat and
Sarwar, Raheem and
Phatthiyaphaibun, Wannaphong and
Chuangsuwanich, Ekapol and
Nutanong, Sarana",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.findings-emnlp.80",
doi = "10.18653/v1/2021.findings-emnlp.80",
pages = "935--944",
abstract = "Cross-lingual Sentence Retrieval (CLSR) aims at retrieving parallel sentence pairs that are translations of each other from a multilingual set of comparable documents. The retrieved parallel sentence pairs can be used in other downstream NLP tasks such as machine translation and cross-lingual word sense disambiguation. We propose a CLSR framework called Robust Fragment-level Representation (RFR) CLSR framework to address Out-of-Domain (OOD) CLSR problems. In particular, we improve the sentence retrieval robustness by representing each sentence as a collection of fragments. In this way, we change the retrieval granularity from the sentence to the fragment level. We performed CLSR experiments based on three OOD datasets, four language pairs, and three base well-known sentence encoders: m-USE, LASER, and LaBSE. Experimental results show that RFR significantly improves the base encoders{'} performance for more than 85{\%} of the cases.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="trijakwanich-etal-2021-robust-fragment">
<titleInfo>
<title>Robust Fragment-Based Framework for Cross-lingual Sentence Retrieval</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nattapol</namePart>
<namePart type="family">Trijakwanich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peerat</namePart>
<namePart type="family">Limkonchotiwat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raheem</namePart>
<namePart type="family">Sarwar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wannaphong</namePart>
<namePart type="family">Phatthiyaphaibun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekapol</namePart>
<namePart type="family">Chuangsuwanich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarana</namePart>
<namePart type="family">Nutanong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2021</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marie-Francine</namePart>
<namePart type="family">Moens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="given">Wen-tau</namePart>
<namePart type="family">Yih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Cross-lingual Sentence Retrieval (CLSR) aims at retrieving parallel sentence pairs that are translations of each other from a multilingual set of comparable documents. The retrieved parallel sentence pairs can be used in other downstream NLP tasks such as machine translation and cross-lingual word sense disambiguation. We propose a CLSR framework called Robust Fragment-level Representation (RFR) CLSR framework to address Out-of-Domain (OOD) CLSR problems. In particular, we improve the sentence retrieval robustness by representing each sentence as a collection of fragments. In this way, we change the retrieval granularity from the sentence to the fragment level. We performed CLSR experiments based on three OOD datasets, four language pairs, and three base well-known sentence encoders: m-USE, LASER, and LaBSE. Experimental results show that RFR significantly improves the base encoders’ performance for more than 85% of the cases.</abstract>
<identifier type="citekey">trijakwanich-etal-2021-robust-fragment</identifier>
<identifier type="doi">10.18653/v1/2021.findings-emnlp.80</identifier>
<location>
<url>https://aclanthology.org/2021.findings-emnlp.80</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>935</start>
<end>944</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Robust Fragment-Based Framework for Cross-lingual Sentence Retrieval
%A Trijakwanich, Nattapol
%A Limkonchotiwat, Peerat
%A Sarwar, Raheem
%A Phatthiyaphaibun, Wannaphong
%A Chuangsuwanich, Ekapol
%A Nutanong, Sarana
%Y Moens, Marie-Francine
%Y Huang, Xuanjing
%Y Specia, Lucia
%Y Yih, Scott Wen-tau
%S Findings of the Association for Computational Linguistics: EMNLP 2021
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F trijakwanich-etal-2021-robust-fragment
%X Cross-lingual Sentence Retrieval (CLSR) aims at retrieving parallel sentence pairs that are translations of each other from a multilingual set of comparable documents. The retrieved parallel sentence pairs can be used in other downstream NLP tasks such as machine translation and cross-lingual word sense disambiguation. We propose a CLSR framework called Robust Fragment-level Representation (RFR) CLSR framework to address Out-of-Domain (OOD) CLSR problems. In particular, we improve the sentence retrieval robustness by representing each sentence as a collection of fragments. In this way, we change the retrieval granularity from the sentence to the fragment level. We performed CLSR experiments based on three OOD datasets, four language pairs, and three base well-known sentence encoders: m-USE, LASER, and LaBSE. Experimental results show that RFR significantly improves the base encoders’ performance for more than 85% of the cases.
%R 10.18653/v1/2021.findings-emnlp.80
%U https://aclanthology.org/2021.findings-emnlp.80
%U https://doi.org/10.18653/v1/2021.findings-emnlp.80
%P 935-944
Markdown (Informal)
[Robust Fragment-Based Framework for Cross-lingual Sentence Retrieval](https://aclanthology.org/2021.findings-emnlp.80) (Trijakwanich et al., Findings 2021)
ACL
- Nattapol Trijakwanich, Peerat Limkonchotiwat, Raheem Sarwar, Wannaphong Phatthiyaphaibun, Ekapol Chuangsuwanich, and Sarana Nutanong. 2021. Robust Fragment-Based Framework for Cross-lingual Sentence Retrieval. In Findings of the Association for Computational Linguistics: EMNLP 2021, pages 935–944, Punta Cana, Dominican Republic. Association for Computational Linguistics.