@inproceedings{nakayama-etal-2024-search,
title = "Search Query Refinement for {J}apanese Named Entity Recognition in {E}-commerce Domain",
author = "Nakayama, Yuki and
Tatsushima, Ryutaro and
Mendieta, Erick and
Murakami, Koji and
Shinzato, Keiji",
editor = "Yang, Yi and
Davani, Aida and
Sil, Avi and
Kumar, Anoop",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 6: Industry Track)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.naacl-industry.39/",
doi = "10.18653/v1/2024.naacl-industry.39",
pages = "447--452",
abstract = "In the E-Commerce domain, search query refinement reformulates malformed queries into canonicalized forms by preprocessing operations such as {\textquotedblleft}term splitting{\textquotedblright} and {\textquotedblleft}term merging{\textquotedblright}. Unfortunately, most relevant research is rather limited to English. In particular, there is a severe lack of study on search query refinement for the Japanese language. Furthermore, no attempt has ever been made to apply refinement methods to data improvement for downstream NLP tasks in real-world scenarios.This paper presents a novel query refinement approach for the Japanese language. Experimental results show that our method achieves significant improvement by 3.5 points through comparison with BERT-CRF as a baseline. Further experiments are also conducted to measure beneficial impact of query refinement on named entity recognition (NER) as the downstream task. Evaluations indicate that the proposed query refinement method contributes to better data quality, leading to performance boost on E-Commerce specific NER tasks by 11.7 points, compared to search query data preprocessed by MeCab, a very popularly adopted Japanese tokenizer."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nakayama-etal-2024-search">
<titleInfo>
<title>Search Query Refinement for Japanese Named Entity Recognition in E-commerce Domain</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuki</namePart>
<namePart type="family">Nakayama</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryutaro</namePart>
<namePart type="family">Tatsushima</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erick</namePart>
<namePart type="family">Mendieta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koji</namePart>
<namePart type="family">Murakami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Keiji</namePart>
<namePart type="family">Shinzato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 6: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aida</namePart>
<namePart type="family">Davani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Avi</namePart>
<namePart type="family">Sil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anoop</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In the E-Commerce domain, search query refinement reformulates malformed queries into canonicalized forms by preprocessing operations such as “term splitting” and “term merging”. Unfortunately, most relevant research is rather limited to English. In particular, there is a severe lack of study on search query refinement for the Japanese language. Furthermore, no attempt has ever been made to apply refinement methods to data improvement for downstream NLP tasks in real-world scenarios.This paper presents a novel query refinement approach for the Japanese language. Experimental results show that our method achieves significant improvement by 3.5 points through comparison with BERT-CRF as a baseline. Further experiments are also conducted to measure beneficial impact of query refinement on named entity recognition (NER) as the downstream task. Evaluations indicate that the proposed query refinement method contributes to better data quality, leading to performance boost on E-Commerce specific NER tasks by 11.7 points, compared to search query data preprocessed by MeCab, a very popularly adopted Japanese tokenizer.</abstract>
<identifier type="citekey">nakayama-etal-2024-search</identifier>
<identifier type="doi">10.18653/v1/2024.naacl-industry.39</identifier>
<location>
<url>https://aclanthology.org/2024.naacl-industry.39/</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>447</start>
<end>452</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Search Query Refinement for Japanese Named Entity Recognition in E-commerce Domain
%A Nakayama, Yuki
%A Tatsushima, Ryutaro
%A Mendieta, Erick
%A Murakami, Koji
%A Shinzato, Keiji
%Y Yang, Yi
%Y Davani, Aida
%Y Sil, Avi
%Y Kumar, Anoop
%S Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 6: Industry Track)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F nakayama-etal-2024-search
%X In the E-Commerce domain, search query refinement reformulates malformed queries into canonicalized forms by preprocessing operations such as “term splitting” and “term merging”. Unfortunately, most relevant research is rather limited to English. In particular, there is a severe lack of study on search query refinement for the Japanese language. Furthermore, no attempt has ever been made to apply refinement methods to data improvement for downstream NLP tasks in real-world scenarios.This paper presents a novel query refinement approach for the Japanese language. Experimental results show that our method achieves significant improvement by 3.5 points through comparison with BERT-CRF as a baseline. Further experiments are also conducted to measure beneficial impact of query refinement on named entity recognition (NER) as the downstream task. Evaluations indicate that the proposed query refinement method contributes to better data quality, leading to performance boost on E-Commerce specific NER tasks by 11.7 points, compared to search query data preprocessed by MeCab, a very popularly adopted Japanese tokenizer.
%R 10.18653/v1/2024.naacl-industry.39
%U https://aclanthology.org/2024.naacl-industry.39/
%U https://doi.org/10.18653/v1/2024.naacl-industry.39
%P 447-452
Markdown (Informal)
[Search Query Refinement for Japanese Named Entity Recognition in E-commerce Domain](https://aclanthology.org/2024.naacl-industry.39/) (Nakayama et al., NAACL 2024)
ACL
- Yuki Nakayama, Ryutaro Tatsushima, Erick Mendieta, Koji Murakami, and Keiji Shinzato. 2024. Search Query Refinement for Japanese Named Entity Recognition in E-commerce Domain. In Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 6: Industry Track), pages 447–452, Mexico City, Mexico. Association for Computational Linguistics.