@inproceedings{fernandes-etal-2022-ceia,
title = "{CEIA}-{NLP} at {CASE} 2022 Task 1: Protest News Detection for {P}ortuguese",
author = "Fernandes, Diogo and
Junior, Adalberto and
Marques, Gabriel and
Soares, Anderson and
Galvao Filho, Arlindo",
editor = {H{\"u}rriyeto{\u{g}}lu, Ali and
Tanev, Hristo and
Zavarella, Vanni and
Y{\"o}r{\"u}k, Erdem},
booktitle = "Proceedings of the 5th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Text (CASE)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.case-1.26",
doi = "10.18653/v1/2022.case-1.26",
pages = "184--188",
abstract = "This paper summarizes our work on the document classification subtask of Multilingual protest news detection of the CASE @ ACL-IJCNLP 2022 workshok. In this context, we investigate the performance of monolingual and multilingual transformer-based models in low data resources, taking Portuguese as an example and evaluating language models on document classification. Our approach became the winning solution in Portuguese document classification achieving 0.8007 F1 Score on Test set. The experimental results demonstrate that multilingual models achieve best results in scenarios with few dataset samples of specific language, because we can train models using datasets from other languages of the same task and domain.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fernandes-etal-2022-ceia">
<titleInfo>
<title>CEIA-NLP at CASE 2022 Task 1: Protest News Detection for Portuguese</title>
</titleInfo>
<name type="personal">
<namePart type="given">Diogo</namePart>
<namePart type="family">Fernandes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adalberto</namePart>
<namePart type="family">Junior</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriel</namePart>
<namePart type="family">Marques</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anderson</namePart>
<namePart type="family">Soares</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arlindo</namePart>
<namePart type="family">Galvao Filho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Text (CASE)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Hürriyetoğlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hristo</namePart>
<namePart type="family">Tanev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vanni</namePart>
<namePart type="family">Zavarella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erdem</namePart>
<namePart type="family">Yörük</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper summarizes our work on the document classification subtask of Multilingual protest news detection of the CASE @ ACL-IJCNLP 2022 workshok. In this context, we investigate the performance of monolingual and multilingual transformer-based models in low data resources, taking Portuguese as an example and evaluating language models on document classification. Our approach became the winning solution in Portuguese document classification achieving 0.8007 F1 Score on Test set. The experimental results demonstrate that multilingual models achieve best results in scenarios with few dataset samples of specific language, because we can train models using datasets from other languages of the same task and domain.</abstract>
<identifier type="citekey">fernandes-etal-2022-ceia</identifier>
<identifier type="doi">10.18653/v1/2022.case-1.26</identifier>
<location>
<url>https://aclanthology.org/2022.case-1.26</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>184</start>
<end>188</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CEIA-NLP at CASE 2022 Task 1: Protest News Detection for Portuguese
%A Fernandes, Diogo
%A Junior, Adalberto
%A Marques, Gabriel
%A Soares, Anderson
%A Galvao Filho, Arlindo
%Y Hürriyetoğlu, Ali
%Y Tanev, Hristo
%Y Zavarella, Vanni
%Y Yörük, Erdem
%S Proceedings of the 5th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Text (CASE)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F fernandes-etal-2022-ceia
%X This paper summarizes our work on the document classification subtask of Multilingual protest news detection of the CASE @ ACL-IJCNLP 2022 workshok. In this context, we investigate the performance of monolingual and multilingual transformer-based models in low data resources, taking Portuguese as an example and evaluating language models on document classification. Our approach became the winning solution in Portuguese document classification achieving 0.8007 F1 Score on Test set. The experimental results demonstrate that multilingual models achieve best results in scenarios with few dataset samples of specific language, because we can train models using datasets from other languages of the same task and domain.
%R 10.18653/v1/2022.case-1.26
%U https://aclanthology.org/2022.case-1.26
%U https://doi.org/10.18653/v1/2022.case-1.26
%P 184-188
Markdown (Informal)
[CEIA-NLP at CASE 2022 Task 1: Protest News Detection for Portuguese](https://aclanthology.org/2022.case-1.26) (Fernandes et al., CASE 2022)
ACL
- Diogo Fernandes, Adalberto Junior, Gabriel Marques, Anderson Soares, and Arlindo Galvao Filho. 2022. CEIA-NLP at CASE 2022 Task 1: Protest News Detection for Portuguese. In Proceedings of the 5th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Text (CASE), pages 184–188, Abu Dhabi, United Arab Emirates (Hybrid). Association for Computational Linguistics.