@inproceedings{wojcik-etal-2023-domain,
title = "Domain-Agnostic Neural Architecture for Class Incremental Continual Learning in Document Processing Platform",
author = "W{\'o}jcik, Mateusz and
Ko{\'s}ciukiewicz, Witold and
Baran, Mateusz and
Kajdanowicz, Tomasz and
Gonczarek, Adam",
editor = "Sitaram, Sunayana and
Beigman Klebanov, Beata and
Williams, Jason D",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.acl-industry.51/",
doi = "10.18653/v1/2023.acl-industry.51",
pages = "527--537",
abstract = "Production deployments in complex systems require ML architectures to be highly efficient and usable against multiple tasks. Particularly demanding are classification problems in which data arrives in a streaming fashion and each class is presented separately. Recent methods with stochastic gradient learning have been shown to struggle in such setups or have limitations like memory buffers, and being restricted to specific domains that disable its usage in real-world scenarios. For this reason, we present a fully differentiable architecture based on the Mixture of Experts model, that enables the training of high-performance classifiers when examples from each class are presented separately. We conducted exhaustive experiments that proved its applicability in various domains and ability to learn online in production environments. The proposed technique achieves SOTA results without a memory buffer and clearly outperforms the reference methods."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wojcik-etal-2023-domain">
<titleInfo>
<title>Domain-Agnostic Neural Architecture for Class Incremental Continual Learning in Document Processing Platform</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mateusz</namePart>
<namePart type="family">Wójcik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Witold</namePart>
<namePart type="family">Kościukiewicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mateusz</namePart>
<namePart type="family">Baran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomasz</namePart>
<namePart type="family">Kajdanowicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Gonczarek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sunayana</namePart>
<namePart type="family">Sitaram</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Beata</namePart>
<namePart type="family">Beigman Klebanov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jason</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Williams</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Production deployments in complex systems require ML architectures to be highly efficient and usable against multiple tasks. Particularly demanding are classification problems in which data arrives in a streaming fashion and each class is presented separately. Recent methods with stochastic gradient learning have been shown to struggle in such setups or have limitations like memory buffers, and being restricted to specific domains that disable its usage in real-world scenarios. For this reason, we present a fully differentiable architecture based on the Mixture of Experts model, that enables the training of high-performance classifiers when examples from each class are presented separately. We conducted exhaustive experiments that proved its applicability in various domains and ability to learn online in production environments. The proposed technique achieves SOTA results without a memory buffer and clearly outperforms the reference methods.</abstract>
<identifier type="citekey">wojcik-etal-2023-domain</identifier>
<identifier type="doi">10.18653/v1/2023.acl-industry.51</identifier>
<location>
<url>https://aclanthology.org/2023.acl-industry.51/</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>527</start>
<end>537</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Domain-Agnostic Neural Architecture for Class Incremental Continual Learning in Document Processing Platform
%A Wójcik, Mateusz
%A Kościukiewicz, Witold
%A Baran, Mateusz
%A Kajdanowicz, Tomasz
%A Gonczarek, Adam
%Y Sitaram, Sunayana
%Y Beigman Klebanov, Beata
%Y Williams, Jason D.
%S Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F wojcik-etal-2023-domain
%X Production deployments in complex systems require ML architectures to be highly efficient and usable against multiple tasks. Particularly demanding are classification problems in which data arrives in a streaming fashion and each class is presented separately. Recent methods with stochastic gradient learning have been shown to struggle in such setups or have limitations like memory buffers, and being restricted to specific domains that disable its usage in real-world scenarios. For this reason, we present a fully differentiable architecture based on the Mixture of Experts model, that enables the training of high-performance classifiers when examples from each class are presented separately. We conducted exhaustive experiments that proved its applicability in various domains and ability to learn online in production environments. The proposed technique achieves SOTA results without a memory buffer and clearly outperforms the reference methods.
%R 10.18653/v1/2023.acl-industry.51
%U https://aclanthology.org/2023.acl-industry.51/
%U https://doi.org/10.18653/v1/2023.acl-industry.51
%P 527-537
Markdown (Informal)
[Domain-Agnostic Neural Architecture for Class Incremental Continual Learning in Document Processing Platform](https://aclanthology.org/2023.acl-industry.51/) (Wójcik et al., ACL 2023)
ACL