@inproceedings{liu-etal-2023-sweet,
title = "{SWEET} - Weakly Supervised Person Name Extraction for Fighting Human Trafficking",
author = "Liu, Javin and
Yu, Hao and
Sujaya, Vidya and
Nair, Pratheeksha and
Pelrine, Kellin and
Rabbany, Reihaneh",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.219/",
doi = "10.18653/v1/2023.findings-emnlp.219",
pages = "3355--3367",
abstract = "In this work, we propose a weak supervision pipeline SWEET: Supervise Weakly for Entity Extraction to fight Trafficking for extracting person names from noisy escort advertisements. Our method combines the simplicity of rule-matching (through antirules, i.e., negated rules) and the generalizability of large language models fine-tuned on benchmark, domain-specific and synthetic datasets, treating them as weak labels. One of the major challenges in this domain is limited labeled data. SWEET addresses this by obtaining multiple weak labels through labeling functions and effectively aggregating them. SWEET outperforms the previous supervised SOTA method for this task by 9{\%} F1 score on domain data and better generalizes to common benchmark datasets. Furthermore, we also release HTGEN, a synthetically generated dataset of escort advertisements (built using ChatGPT) to facilitate further research within the community."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2023-sweet">
<titleInfo>
<title>SWEET - Weakly Supervised Person Name Extraction for Fighting Human Trafficking</title>
</titleInfo>
<name type="personal">
<namePart type="given">Javin</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hao</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vidya</namePart>
<namePart type="family">Sujaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pratheeksha</namePart>
<namePart type="family">Nair</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kellin</namePart>
<namePart type="family">Pelrine</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reihaneh</namePart>
<namePart type="family">Rabbany</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work, we propose a weak supervision pipeline SWEET: Supervise Weakly for Entity Extraction to fight Trafficking for extracting person names from noisy escort advertisements. Our method combines the simplicity of rule-matching (through antirules, i.e., negated rules) and the generalizability of large language models fine-tuned on benchmark, domain-specific and synthetic datasets, treating them as weak labels. One of the major challenges in this domain is limited labeled data. SWEET addresses this by obtaining multiple weak labels through labeling functions and effectively aggregating them. SWEET outperforms the previous supervised SOTA method for this task by 9% F1 score on domain data and better generalizes to common benchmark datasets. Furthermore, we also release HTGEN, a synthetically generated dataset of escort advertisements (built using ChatGPT) to facilitate further research within the community.</abstract>
<identifier type="citekey">liu-etal-2023-sweet</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.219</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.219/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>3355</start>
<end>3367</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SWEET - Weakly Supervised Person Name Extraction for Fighting Human Trafficking
%A Liu, Javin
%A Yu, Hao
%A Sujaya, Vidya
%A Nair, Pratheeksha
%A Pelrine, Kellin
%A Rabbany, Reihaneh
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F liu-etal-2023-sweet
%X In this work, we propose a weak supervision pipeline SWEET: Supervise Weakly for Entity Extraction to fight Trafficking for extracting person names from noisy escort advertisements. Our method combines the simplicity of rule-matching (through antirules, i.e., negated rules) and the generalizability of large language models fine-tuned on benchmark, domain-specific and synthetic datasets, treating them as weak labels. One of the major challenges in this domain is limited labeled data. SWEET addresses this by obtaining multiple weak labels through labeling functions and effectively aggregating them. SWEET outperforms the previous supervised SOTA method for this task by 9% F1 score on domain data and better generalizes to common benchmark datasets. Furthermore, we also release HTGEN, a synthetically generated dataset of escort advertisements (built using ChatGPT) to facilitate further research within the community.
%R 10.18653/v1/2023.findings-emnlp.219
%U https://aclanthology.org/2023.findings-emnlp.219/
%U https://doi.org/10.18653/v1/2023.findings-emnlp.219
%P 3355-3367
Markdown (Informal)
[SWEET - Weakly Supervised Person Name Extraction for Fighting Human Trafficking](https://aclanthology.org/2023.findings-emnlp.219/) (Liu et al., Findings 2023)
ACL