{"id":"https://openalex.org/W4306311630","doi":"https://doi.org/10.48550/arxiv.2210.06990","title":"Exploring Segmentation Approaches for Neural Machine Translation of Code-Switched Egyptian Arabic-English Text","display_name":"Exploring Segmentation Approaches for Neural Machine Translation of Code-Switched Egyptian Arabic-English Text","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4306311630","doi":"https://doi.org/10.48550/arxiv.2210.06990"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2210.06990","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2210.06990","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064798728","display_name":"Marwa Gaser","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gaser, Marwa","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048083543","display_name":"Manuel Mager","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mager, Manuel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004299578","display_name":"Injy Hamed","orcid":"https://orcid.org/0000-0002-9171-9461"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hamed, Injy","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084517393","display_name":"Nizar Habash","orcid":"https://orcid.org/0000-0002-1831-3457"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Habash, Nizar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080036358","display_name":"Slim Abdennadher","orcid":"https://orcid.org/0000-0003-1817-1855"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abdennadher, Slim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5020700841","display_name":"Ngoc Thang Vu","orcid":"https://orcid.org/0000-0001-7893-9147"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vu, Ngoc Thang","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":59},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9812,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.979,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6899451}],"concepts":[{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.88456446},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6963531},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6899451},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.5708956},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.56651044},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.55767804},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.5325716},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.51963276},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.50791126},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.48126715},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.45943257},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35042727},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.33912367},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.15807545},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07104996},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.068858236},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2210.06990","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2210.06990","pdf_url":"http://arxiv.org/pdf/2210.06990","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2210.06990","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2210.06990","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4362495644","https://openalex.org/W4249048193","https://openalex.org/W3184247917","https://openalex.org/W3172144307","https://openalex.org/W3151736118","https://openalex.org/W3011059803","https://openalex.org/W3000360420","https://openalex.org/W2883671469","https://openalex.org/W2728761353","https://openalex.org/W1931440630"],"abstract_inverted_index":{"Data":[0],"sparsity":[1,38],"is":[2,13,130,148],"one":[3],"of":[4,19,26,58,87,98,122,143,169],"the":[5,17,24,56,107,120,123,134,152,167],"main":[6],"challenges":[7],"posed":[8],"by":[9],"code-switching":[10],"(CS),":[11],"which":[12],"further":[14],"exacerbated":[15],"in":[16,35,39,109,114],"case":[18],"morphologically":[20],"rich":[21],"languages.":[22],"For":[23,137,154],"task":[25],"machine":[27],"translation":[28],"(MT),":[29],"morphological":[30],"segmentation":[31,60,69,110,124],"has":[32,44],"proven":[33],"successful":[34],"alleviating":[36],"data":[37,91,135],"monolingual":[40],"contexts;":[41],"however,":[42],"it":[43],"not":[45,162],"been":[46],"investigated":[47],"for":[48,128],"CS":[49],"settings.":[50],"In":[51],"this":[52],"paper,":[53],"we":[54,117],"study":[55],"effectiveness":[57],"different":[59,96],"approaches":[61],"on":[62,73,133],"MT":[63,74,129],"performance,":[64],"covering":[65],"morphology-based":[66,146],"and":[67,93,145],"frequency-based":[68,170],"techniques.":[70],"We":[71,80],"experiment":[72],"from":[75],"code-switched":[76],"Arabic-English":[77],"to":[78,126,150],"English.":[79],"provide":[81],"detailed":[82],"analysis,":[83],"examining":[84],"a":[85,141,159],"variety":[86],"conditions,":[88],"such":[89,158],"as":[90],"size":[92],"sentences":[94],"with":[95],"degrees":[97],"CS.":[99],"Empirical":[100],"results":[101],"show":[102],"that":[103,119],"morphology-aware":[104],"segmenters":[105],"perform":[106,151],"best":[108],"tasks":[111],"but":[112],"under-perform":[113],"MT.":[115],"Nevertheless,":[116],"find":[118],"choice":[121],"setup":[125],"use":[127,168],"highly":[131],"dependent":[132],"size.":[136],"extreme":[138],"low-resource":[139],"scenarios,":[140],"combination":[142,160],"frequency":[144],"segmentations":[147],"shown":[149],"best.":[153],"more":[155],"resourced":[156],"settings,":[157],"does":[161],"bring":[163],"significant":[164],"improvements":[165],"over":[166],"segmentation.":[171]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4306311630","counts_by_year":[],"updated_date":"2025-03-01T16:35:47.421343","created_date":"2022-10-15"}