{"id":"https://openalex.org/W3156886841","doi":"https://doi.org/10.18653/v1/2021.eacl-main.194","title":"Subword Pooling Makes a Difference","display_name":"Subword Pooling Makes a Difference","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3156886841","doi":"https://doi.org/10.18653/v1/2021.eacl-main.194","mag":"3156886841"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2021.eacl-main.194","pdf_url":"https://aclanthology.org/2021.eacl-main.194.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://aclanthology.org/2021.eacl-main.194.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018650131","display_name":"Judit \u00c1cs","orcid":"https://orcid.org/0000-0003-4918-4333"},"institutions":[{"id":"https://openalex.org/I4210117195","display_name":"Institute for Computer Science and Control","ror":"https://ror.org/0249v7n71","country_code":"HU","type":"facility","lineage":["https://openalex.org/I4210117195","https://openalex.org/I7597260"]}],"countries":["HU"],"is_corresponding":false,"raw_author_name":"Judit \u00c1cs","raw_affiliation_strings":["Institute for Computer Science and Control"],"affiliations":[{"raw_affiliation_string":"Institute for Computer Science and Control","institution_ids":["https://openalex.org/I4210117195"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029159426","display_name":"\u00c1kos K\u00e1d\u00e1r","orcid":null},"institutions":[{"id":"https://openalex.org/I29770179","display_name":"Budapest University of Technology and Economics","ror":"https://ror.org/02w42ss30","country_code":"HU","type":"education","lineage":["https://openalex.org/I29770179"]}],"countries":["HU"],"is_corresponding":true,"raw_author_name":"\u00c1kos K\u00e1d\u00e1r","raw_affiliation_strings":["Budapest University of Technology and Economics"],"affiliations":[{"raw_affiliation_string":"Budapest University of Technology and Economics","institution_ids":["https://openalex.org/I29770179"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065413313","display_name":"Andr\u00e1s Kornai","orcid":"https://orcid.org/0000-0001-6078-6840"},"institutions":[{"id":"https://openalex.org/I29770179","display_name":"Budapest University of Technology and Economics","ror":"https://ror.org/02w42ss30","country_code":"HU","type":"education","lineage":["https://openalex.org/I29770179"]}],"countries":["HU"],"is_corresponding":false,"raw_author_name":"Andras Kornai","raw_affiliation_strings":["Budapest University of Technology and Economics"],"affiliations":[{"raw_affiliation_string":"Budapest University of Technology and Economics","institution_ids":["https://openalex.org/I29770179"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5029159426"],"corresponding_institution_ids":["https://openalex.org/I29770179"],"apc_list":null,"apc_paid":null,"fwci":1.932,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":19,"citation_normalized_percentile":{"value":0.999914,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":93,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"2284","last_page":"2295"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9979,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.8036569},{"id":"https://openalex.org/keywords/lexical-analysis","display_name":"Lexical analysis","score":0.71150804},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4933118}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.85476476},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.8036569},{"id":"https://openalex.org/C176982825","wikidata":"https://www.wikidata.org/wiki/Q835922","display_name":"Lexical analysis","level":2,"score":0.71150804},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.69444454},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6453548},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.57123226},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.5096803},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4933118},{"id":"https://openalex.org/C2776207758","wikidata":"https://www.wikidata.org/wiki/Q5303302","display_name":"Downstream (manufacturing)","level":2,"score":0.44069374},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.14602542},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.13584104},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2021.eacl-main.194","pdf_url":"https://aclanthology.org/2021.eacl-main.194.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"http://eprints.sztaki.hu/10097/","pdf_url":"https://eprints.sztaki.hu/10097/1/Acs_2284_31994640_ny.pdf","source":{"id":"https://openalex.org/S4306401907","display_name":"SZTAKI Publication Repository (Hungarian Academy of Sciences)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I7597260","host_organization_name":"Hungarian Academy of Sciences","host_organization_lineage":["https://openalex.org/I7597260"],"host_organization_lineage_names":["Hungarian Academy of Sciences"],"type":"repository"},"license":"mit","license_id":"https://openalex.org/licenses/mit","version":"acceptedVersion","is_accepted":true,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2102.10864","pdf_url":"http://arxiv.org/pdf/2102.10864","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2021.eacl-main.194","pdf_url":"https://aclanthology.org/2021.eacl-main.194.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"display_name":"Quality education","id":"https://metadata.un.org/sdg/4","score":0.85}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":19,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1623072288","https://openalex.org/W2064675550","https://openalex.org/W2121879602","https://openalex.org/W2124973918","https://openalex.org/W2742113707","https://openalex.org/W2915774325","https://openalex.org/W2946359678","https://openalex.org/W2951562155","https://openalex.org/W2963341956","https://openalex.org/W2963643701","https://openalex.org/W2964121744","https://openalex.org/W2964301648","https://openalex.org/W2970529259","https://openalex.org/W2983040767","https://openalex.org/W2989539713","https://openalex.org/W3032816972","https://openalex.org/W3035164270","https://openalex.org/W3035390927"],"related_works":["https://openalex.org/W803346624","https://openalex.org/W4390975304","https://openalex.org/W4287804464","https://openalex.org/W3211292372","https://openalex.org/W3103989898","https://openalex.org/W3022252430","https://openalex.org/W2953234277","https://openalex.org/W2900413183","https://openalex.org/W2626256601","https://openalex.org/W147410782"],"abstract_inverted_index":{"Contextual":[0],"word-representations":[1],"became":[2],"a":[3,29,38,123],"standard":[4],"in":[5,65,73,145],"modern":[6],"natural":[7],"language":[8],"processing":[9],"systems.":[10],"These":[11],"models":[12],"use":[13,122],"subword":[14,50],"tokenization":[15],"to":[16,37,121],"handle":[17],"large":[18],"vocabularies":[19],"and":[20,63,79,95,116,136,155],"unknown":[21],"words.":[22],"Word-level":[23],"usage":[24],"of":[25,31,49,111],"such":[26],"systems":[27],"requires":[28],"way":[30],"pooling":[32,51],"multiple":[33],"subwords":[34],"that":[35,139],"correspond":[36],"single":[39],"word.":[40],"In":[41],"this":[42],"paper":[43],"we":[44,137],"investigate":[45],"how":[46],"the":[47,53,84,88,92,96,105,117,127,156],"choice":[48,119],"affects":[52],"downstream":[54],"performance":[55],"on":[56],"three":[57],"tasks:":[58],"morphological":[59,82],"probing,":[60],"POS":[61,108],"tagging":[62,109],"NER,":[64],"9":[66,147],"typologically":[67],"diverse":[68],"languages.":[69,148],"We":[70,149],"compare":[71],"these":[72,112],"two":[74],"massively":[75],"multilingual":[76],"models,":[77],"mBERT":[78,140],"XLM-RoBERTa.":[80],"For":[81,107],"tasks,":[83],"widely":[85],"used":[86],"'choose":[87],"first":[89],"subword'":[90],"is":[91,120,141],"worst":[93],"strategy":[94,131],"best":[97,118,133],"results":[98],"are":[99],"obtained":[100],"by":[101],"using":[102],"attention":[103],"over":[104,126],"subwords.":[106,128],"both":[110],"strategies":[113],"perform":[114],"poorly":[115],"small":[124],"LSTM":[125],"The":[129],"same":[130],"works":[132],"for":[134],"NER":[135],"show":[138],"better":[142],"than":[143],"XLM-RoBERTa":[144],"all":[146,152],"publicly":[150],"release":[151],"code,":[153],"data":[154],"full":[157],"result":[158],"tables":[159],"at":[160],"https://github.com/juditacs/subword-choice":[161],".":[162]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3156886841","counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":4}],"updated_date":"2025-01-07T08:22:21.241116","created_date":"2021-04-26"}