{"id":"https://openalex.org/W2970295608","doi":"https://doi.org/10.18653/v1/w19-4321","title":"Investigating Sub-Word Embedding Strategies for the Morphologically Rich and Free Phrase-Order Hungarian","display_name":"Investigating Sub-Word Embedding Strategies for the Morphologically Rich and Free Phrase-Order Hungarian","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2970295608","doi":"https://doi.org/10.18653/v1/w19-4321","mag":"2970295608"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-4321","pdf_url":"https://www.aclweb.org/anthology/W19-4321.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://www.aclweb.org/anthology/W19-4321.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028099925","display_name":"B\u00e1lint D\u00f6br\u00f6ssy","orcid":null},"institutions":[{"id":"https://openalex.org/I29770179","display_name":"Budapest University of Technology and Economics","ror":"https://ror.org/02w42ss30","country_code":"HU","type":"education","lineage":["https://openalex.org/I29770179"]}],"countries":["HU"],"is_corresponding":false,"raw_author_name":"B\u00e1lint D\u00f6br\u00f6ssy","raw_affiliation_strings":["Dept of Telecommunications and Media Info, Budapest University of Technology and Econ"],"affiliations":[{"raw_affiliation_string":"Dept of Telecommunications and Media Info, Budapest University of Technology and Econ","institution_ids":["https://openalex.org/I29770179"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050420939","display_name":"M\u00e1rton Makrai","orcid":null},"institutions":[{"id":"https://openalex.org/I2802350943","display_name":"Hungarian Research Centre for Linguistics","ror":"https://ror.org/005cqsz63","country_code":"HU","type":"facility","lineage":["https://openalex.org/I2802350943","https://openalex.org/I7597260"]},{"id":"https://openalex.org/I7597260","display_name":"Hungarian Academy of Sciences","ror":"https://ror.org/02ks8qq67","country_code":"HU","type":"government","lineage":["https://openalex.org/I7597260"]}],"countries":["HU"],"is_corresponding":false,"raw_author_name":"M\u00e1rton Makrai","raw_affiliation_strings":["Research Institute for Linguistics of the Hungarian Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Research Institute for Linguistics of the Hungarian Academy of Sciences","institution_ids":["https://openalex.org/I2802350943","https://openalex.org/I7597260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014608650","display_name":"Bal\u00e1zs Tarj\u00e1n","orcid":"https://orcid.org/0000-0002-9676-3082"},"institutions":[{"id":"https://openalex.org/I29770179","display_name":"Budapest University of Technology and Economics","ror":"https://ror.org/02w42ss30","country_code":"HU","type":"education","lineage":["https://openalex.org/I29770179"]}],"countries":["HU"],"is_corresponding":false,"raw_author_name":"Bal\u00e1zs Tarj\u00e1n","raw_affiliation_strings":["Dept of Telecommunications and Media Info, Budapest University of Technology and Econ"],"affiliations":[{"raw_affiliation_string":"Dept of Telecommunications and Media Info, Budapest University of Technology and Econ","institution_ids":["https://openalex.org/I29770179"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043901293","display_name":"Gy\u00f6rgy Szasz\u00e1k","orcid":null},"institutions":[{"id":"https://openalex.org/I29770179","display_name":"Budapest University of Technology and Economics","ror":"https://ror.org/02w42ss30","country_code":"HU","type":"education","lineage":["https://openalex.org/I29770179"]}],"countries":["HU"],"is_corresponding":false,"raw_author_name":"Gy\u00f6rgy Szasz\u00e1k","raw_affiliation_strings":["Dept of Telecommunications and Media Info, Budapest University of Technology and Econ"],"affiliations":[{"raw_affiliation_string":"Dept of Telecommunications and Media Info, Budapest University of Technology and Econ","institution_ids":["https://openalex.org/I29770179"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.274,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":6,"citation_normalized_percentile":{"value":0.694943,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":81,"max":82},"biblio":{"volume":null,"issue":null,"first_page":"187","last_page":"193"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9908,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/agglutinative-language","display_name":"Agglutinative language","score":0.77881116},{"id":"https://openalex.org/keywords/lemmatisation","display_name":"Lemmatisation","score":0.75542986},{"id":"https://openalex.org/keywords/phrase","display_name":"Phrase","score":0.5751719},{"id":"https://openalex.org/keywords/word-order","display_name":"Word order","score":0.5282815},{"id":"https://openalex.org/keywords/word-embedding","display_name":"Word embedding","score":0.5112424}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.80729914},{"id":"https://openalex.org/C80875076","wikidata":"https://www.wikidata.org/wiki/Q171263","display_name":"Agglutinative language","level":3,"score":0.77881116},{"id":"https://openalex.org/C161831844","wikidata":"https://www.wikidata.org/wiki/Q2554325","display_name":"Lemmatisation","level":2,"score":0.75542986},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7381671},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6881969},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.65410155},{"id":"https://openalex.org/C2776224158","wikidata":"https://www.wikidata.org/wiki/Q187931","display_name":"Phrase","level":2,"score":0.5751719},{"id":"https://openalex.org/C70777604","wikidata":"https://www.wikidata.org/wiki/Q257885","display_name":"Word order","level":2,"score":0.5282815},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.519677},{"id":"https://openalex.org/C2777462759","wikidata":"https://www.wikidata.org/wiki/Q18395344","display_name":"Word embedding","level":3,"score":0.5112424},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.47916713},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.44163352},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.24741521},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.17604518},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15754372},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-4321","pdf_url":"https://www.aclweb.org/anthology/W19-4321.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"http://real.mtak.hu/101686/2/W194321.pdf","pdf_url":"http://real.mtak.hu/101686/2/W194321.pdf","source":{"id":"https://openalex.org/S4306400081","display_name":"Repository of the Academy's Library (Library of the Hungarian Academy of Sciences)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210140733","host_organization_name":"Library and Information Centre of the Hungarian Academy of Sciences","host_organization_lineage":["https://openalex.org/I4210140733"],"host_organization_lineage_names":["Library and Information Centre of the Hungarian Academy of Sciences"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-4321","pdf_url":"https://www.aclweb.org/anthology/W19-4321.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality education","score":0.74}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":29,"referenced_works":["https://openalex.org/W1505680913","https://openalex.org/W1614298861","https://openalex.org/W168564468","https://openalex.org/W1937075317","https://openalex.org/W2091812280","https://openalex.org/W2111741689","https://openalex.org/W2141599568","https://openalex.org/W2167419393","https://openalex.org/W2176796957","https://openalex.org/W2251012068","https://openalex.org/W2294970769","https://openalex.org/W2417763662","https://openalex.org/W2493916176","https://openalex.org/W2508815538","https://openalex.org/W2512498397","https://openalex.org/W2606342375","https://openalex.org/W2608216770","https://openalex.org/W2731822910","https://openalex.org/W2735949438","https://openalex.org/W2952332047","https://openalex.org/W2962772361","https://openalex.org/W2962784628","https://openalex.org/W2963548060","https://openalex.org/W2963626623","https://openalex.org/W2963711067","https://openalex.org/W2964005834","https://openalex.org/W309335912","https://openalex.org/W4285719527","https://openalex.org/W4294367149"],"related_works":["https://openalex.org/W4378419095","https://openalex.org/W4319049525","https://openalex.org/W4312163393","https://openalex.org/W3215822304","https://openalex.org/W2970295608","https://openalex.org/W2610856011","https://openalex.org/W2514401925","https://openalex.org/W2295293153","https://openalex.org/W2187070383","https://openalex.org/W1574718645"],"abstract_inverted_index":{"For":[0,32],"morphologically":[1],"rich":[2],"languages,":[3],"word":[4,16,26,40,44,95],"embeddings":[5,41,56],"provide":[6],"less":[7,24],"consistent":[8],"semantic":[9,37,91,125],"representations":[10],"due":[11],"to":[12,51,89,117,122],"higher":[13],"variance":[14],"in":[15,84,93,135],"forms.":[17],"Moreover,":[18],"these":[19],"languages":[20],"often":[21],"allow":[22],"for":[23],"constrained":[25],"order,":[27],"which":[28],"further":[29],"increases":[30],"variance.":[31],"the":[33,119],"highly":[34],"agglutinative":[35],"Hungarian,":[36],"accuracy":[38],"of":[39,99],"measured":[42],"on":[43],"analogy":[45],"tasks":[46],"drops":[47],"by":[48,78],"50-75%":[49],"compared":[50],"English.":[52],"We":[53],"observed":[54],"that":[55],"learn":[57],"morphosyntax":[58],"quite":[59],"well":[60],"instead.":[61],"Therefore,":[62],"we":[63],"explore":[64],"and":[65,81,103],"evaluate":[66],"several":[67],"sub-word":[68],"unit":[69],"based":[70,113],"embedding":[71,101],"strategies":[72],"\u2013":[73,88],"character":[74,129],"n-grams,":[75],"lemmatization":[76,114],"provided":[77],"an":[79],"NLP-pipeline,":[80],"segments":[82],"obtained":[83],"unsupervised":[85],"learning":[86],"(morfessor)":[87],"boost":[90],"consistency":[92],"Hungarian":[94],"vectors.":[96],"The":[97],"effect":[98],"changing":[100],"dimension":[102],"context":[104],"window":[105],"size":[106],"have":[107],"also":[108],"been":[109],"considered.":[110],"Morphological":[111],"analysis":[112],"was":[115,131],"found":[116,132],"be":[118],"best":[120],"strategy":[121],"improve":[123],"embeddings'":[124],"accuracy,":[126],"whereas":[127],"adding":[128],"n-grams":[130],"consistently":[133],"counterproductive":[134],"this":[136],"regard.":[137]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2970295608","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2020,"cited_by_count":1}],"updated_date":"2024-12-13T20:19:36.344317","created_date":"2019-09-05"}