{"id":"https://openalex.org/W4385484688","doi":"https://doi.org/10.1109/ijcnn54540.2023.10191839","title":"Handling Chinese OOV with a Combination of Radical-based Sub-words and Glyph Features","display_name":"Handling Chinese OOV with a Combination of Radical-based Sub-words and Glyph Features","publication_year":2023,"publication_date":"2023-06-18","ids":{"openalex":"https://openalex.org/W4385484688","doi":"https://doi.org/10.1109/ijcnn54540.2023.10191839"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn54540.2023.10191839","pdf_url":null,"source":{"id":"https://openalex.org/S4363607707","display_name":"2022 International Joint Conference on Neural Networks (IJCNN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047738189","display_name":"Yifan Xu","orcid":"https://orcid.org/0000-0002-6031-3717"},"institutions":[{"id":"https://openalex.org/I146399215","display_name":"University of Tsukuba","ror":"https://ror.org/02956yf07","country_code":"JP","type":"education","lineage":["https://openalex.org/I146399215"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yifan Xu","raw_affiliation_strings":["Graduate School of Comprehensive Human Sciences, University of Tsukuba"],"affiliations":[{"raw_affiliation_string":"Graduate School of Comprehensive Human Sciences, University of Tsukuba","institution_ids":["https://openalex.org/I146399215"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078878706","display_name":"Yohei Seki","orcid":"https://orcid.org/0000-0001-6388-1480"},"institutions":[{"id":"https://openalex.org/I146399215","display_name":"University of Tsukuba","ror":"https://ror.org/02956yf07","country_code":"JP","type":"education","lineage":["https://openalex.org/I146399215"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yohei Seki","raw_affiliation_strings":["Institute of Library, Information, and Media Science, University of Tsukuba"],"affiliations":[{"raw_affiliation_string":"Institute of Library, Information, and Media Science, University of Tsukuba","institution_ids":["https://openalex.org/I146399215"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":67},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9993,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9917,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/glyph","display_name":"Glyph (data visualization)","score":0.94829834},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.43249428}],"concepts":[{"id":"https://openalex.org/C142816647","wikidata":"https://www.wikidata.org/wiki/Q5573018","display_name":"Glyph (data visualization)","level":3,"score":0.94829834},{"id":"https://openalex.org/C83535845","wikidata":"https://www.wikidata.org/wiki/Q82772","display_name":"Kanji","level":3,"score":0.9408012},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7996783},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6994431},{"id":"https://openalex.org/C2781051154","wikidata":"https://www.wikidata.org/wiki/Q8201","display_name":"Chinese characters","level":2,"score":0.62555134},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5219799},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4708958},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.44199994},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4362552},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.43249428},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.17125925},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15048105},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.09344116},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn54540.2023.10191839","pdf_url":null,"source":{"id":"https://openalex.org/S4363607707","display_name":"2022 International Joint Conference on Neural Networks (IJCNN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality education","id":"https://metadata.un.org/sdg/4","score":0.75}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":25,"referenced_works":["https://openalex.org/W1614298861","https://openalex.org/W2100664567","https://openalex.org/W2101105183","https://openalex.org/W2118434577","https://openalex.org/W2170240176","https://openalex.org/W2788009253","https://openalex.org/W2895340641","https://openalex.org/W2896457183","https://openalex.org/W2912473624","https://openalex.org/W2944852028","https://openalex.org/W2952685631","https://openalex.org/W2955058313","https://openalex.org/W2962784628","https://openalex.org/W2963506925","https://openalex.org/W2963745526","https://openalex.org/W2964123686","https://openalex.org/W2981755851","https://openalex.org/W3106815411","https://openalex.org/W3167115472","https://openalex.org/W3174396451","https://openalex.org/W4294170691","https://openalex.org/W4313156423","https://openalex.org/W4385245566","https://openalex.org/W4394663828","https://openalex.org/W630532510"],"related_works":["https://openalex.org/W931439383","https://openalex.org/W4381616474","https://openalex.org/W4212776738","https://openalex.org/W418410444","https://openalex.org/W2384047089","https://openalex.org/W2368258406","https://openalex.org/W2320366352","https://openalex.org/W2250289928","https://openalex.org/W2149918773","https://openalex.org/W1928047647"],"abstract_inverted_index":{"In":[0,113],"natural":[1],"language":[2],"processing":[3],"tasks,":[4],"dictionaries":[5],"must":[6,11],"be":[7,12],"built":[8],"and":[9,47,63,92,110,124,137,168],"words":[10,46,50,173],"converted":[13,165],"into":[14,51],"their":[15,64,102],"corresponding":[16,135],"embeddings,":[17],"creating":[18],"problems":[19],"such":[20,31],"as":[21,32],"out":[22],"of":[23,59,74,82,108,119,122,161,171,185,192],"vocabulary":[24],"(OOV).":[25],"To":[26],"solve":[27],"this":[28,114],"problem,":[29],"algorithms":[30,76],"byte":[33],"pair":[34],"encoding":[35],"(BPE)":[36],"have":[37],"been":[38],"widely":[39],"used":[40],"to":[41,133,153,175],"generate":[42],"sub-words":[43],"from":[44],"known":[45],"decompose":[48],"OOV":[49,172],"existing":[52],"sub-words.":[53],"However,":[54],"Chinese":[55,123,166],"has":[56],"many":[57],"types":[58],"characters":[60,99,132,152,162,199],"called":[61],"Kanji,":[62],"distribution":[65,160],"in":[66,156,163],"the":[67,72,80,126,131,134,142,164,169,183,190,216],"text":[68],"is":[69],"sparse,":[70],"reducing":[71],"performance":[73,214],"BPE":[75,127],"that":[77,148,195,205],"rely":[78],"on":[79],"frequency":[81],"occurrence.":[83],"Compared":[84],"with":[85,141,207],"Kanji":[86,98,151,198],"characters,":[87],"radicals":[88,154],"are":[89,100,111],"less":[90],"varied":[91],"more":[93,150,158],"densely":[94],"distributed.":[95],"Moreover,":[96],"because":[97],"hieroglyphs,":[101],"glyphs":[103],"contain":[104],"a":[105,157,179],"rich":[106],"amount":[107],"information":[109],"unique.":[112],"paper,":[115],"we":[116],"take":[117],"advantage":[118],"these":[120],"characteristics":[121],"apply":[125],"algorithm":[128],"after":[129],"converting":[130,149],"radicals,":[136],"combining":[138,206],"sub-word":[139],"embedding":[140,187,194,208],"glyph":[143,210],"feature":[144],"vector.":[145],"Experiments":[146],"show":[147],"results":[155,202],"even":[159],"text,":[167],"number":[170],"continues":[174],"decrease.":[176],"Furthermore,":[177],"at":[178],"30%":[180],"conversion":[181,218],"ratio,":[182],"effect":[184,191],"radical-based":[186],"can":[188,212],"outperform":[189],"character-level":[193],"keeps":[196],"all":[197],"unchanged.":[200],"The":[201],"also":[203],"indicate":[204],"using":[209],"features":[211],"improve":[213],"despite":[215],"different":[217],"ratios.":[219]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4385484688","counts_by_year":[],"updated_date":"2025-01-02T21:24:15.991804","created_date":"2023-08-03"}