@inproceedings{bartl-leavy-2022-inferring,
title = "Inferring Gender: A Scalable Methodology for Gender Detection with Online Lexical Databases",
author = "Bartl, Marion and
Leavy, Susan",
editor = "Chakravarthi, Bharathi Raja and
Bharathi, B and
McCrae, John P and
Zarrouk, Manel and
Bali, Kalika and
Buitelaar, Paul",
booktitle = "Proceedings of the Second Workshop on Language Technology for Equality, Diversity and Inclusion",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.ltedi-1.7/",
doi = "10.18653/v1/2022.ltedi-1.7",
pages = "47--58",
abstract = "This paper presents a new method for automatic detection of gendered terms in large-scale language datasets. Currently, the evaluation of gender bias in natural language processing relies on the use of manually compiled lexicons of gendered expressions, such as pronouns and words that imply gender. However, manual compilation of lists with lexical gender can lead to static information if lists are not periodically updated and often involve value judgements by individual annotators and researchers. Moreover, terms not included in the lexicons fall out of the range of analysis. To address these issues, we devised a scalable dictionary-based method to automatically detect lexical gender that can provide a dynamic, up-to-date analysis with high coverage. Our approach reaches over 80{\%} accuracy in determining the lexical gender of words retrieved randomly from a Wikipedia sample and when testing on a list of gendered words used in previous research."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bartl-leavy-2022-inferring">
<titleInfo>
<title>Inferring Gender: A Scalable Methodology for Gender Detection with Online Lexical Databases</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marion</namePart>
<namePart type="family">Bartl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Susan</namePart>
<namePart type="family">Leavy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Language Technology for Equality, Diversity and Inclusion</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">B</namePart>
<namePart type="family">Bharathi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="given">P</namePart>
<namePart type="family">McCrae</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manel</namePart>
<namePart type="family">Zarrouk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Buitelaar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents a new method for automatic detection of gendered terms in large-scale language datasets. Currently, the evaluation of gender bias in natural language processing relies on the use of manually compiled lexicons of gendered expressions, such as pronouns and words that imply gender. However, manual compilation of lists with lexical gender can lead to static information if lists are not periodically updated and often involve value judgements by individual annotators and researchers. Moreover, terms not included in the lexicons fall out of the range of analysis. To address these issues, we devised a scalable dictionary-based method to automatically detect lexical gender that can provide a dynamic, up-to-date analysis with high coverage. Our approach reaches over 80% accuracy in determining the lexical gender of words retrieved randomly from a Wikipedia sample and when testing on a list of gendered words used in previous research.</abstract>
<identifier type="citekey">bartl-leavy-2022-inferring</identifier>
<identifier type="doi">10.18653/v1/2022.ltedi-1.7</identifier>
<location>
<url>https://aclanthology.org/2022.ltedi-1.7/</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>47</start>
<end>58</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Inferring Gender: A Scalable Methodology for Gender Detection with Online Lexical Databases
%A Bartl, Marion
%A Leavy, Susan
%Y Chakravarthi, Bharathi Raja
%Y Bharathi, B.
%Y McCrae, John P.
%Y Zarrouk, Manel
%Y Bali, Kalika
%Y Buitelaar, Paul
%S Proceedings of the Second Workshop on Language Technology for Equality, Diversity and Inclusion
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F bartl-leavy-2022-inferring
%X This paper presents a new method for automatic detection of gendered terms in large-scale language datasets. Currently, the evaluation of gender bias in natural language processing relies on the use of manually compiled lexicons of gendered expressions, such as pronouns and words that imply gender. However, manual compilation of lists with lexical gender can lead to static information if lists are not periodically updated and often involve value judgements by individual annotators and researchers. Moreover, terms not included in the lexicons fall out of the range of analysis. To address these issues, we devised a scalable dictionary-based method to automatically detect lexical gender that can provide a dynamic, up-to-date analysis with high coverage. Our approach reaches over 80% accuracy in determining the lexical gender of words retrieved randomly from a Wikipedia sample and when testing on a list of gendered words used in previous research.
%R 10.18653/v1/2022.ltedi-1.7
%U https://aclanthology.org/2022.ltedi-1.7/
%U https://doi.org/10.18653/v1/2022.ltedi-1.7
%P 47-58
Markdown (Informal)
[Inferring Gender: A Scalable Methodology for Gender Detection with Online Lexical Databases](https://aclanthology.org/2022.ltedi-1.7/) (Bartl & Leavy, LTEDI 2022)
ACL