{"id":"https://openalex.org/W4288125826","doi":"https://doi.org/10.26615/978-954-452-056-4_090","title":"Unsupervised Data Augmentation for Less-Resourced Languages with no Standardized Spelling","display_name":"Unsupervised Data Augmentation for Less-Resourced Languages with no Standardized Spelling","publication_year":2019,"publication_date":"2019-10-22","ids":{"openalex":"https://openalex.org/W4288125826","doi":"https://doi.org/10.26615/978-954-452-056-4_090"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.26615/978-954-452-056-4_090","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"preprint","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://doi.org/10.26615/978-954-452-056-4_090","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064622653","display_name":"Alice Millour","orcid":null},"institutions":[{"id":"https://openalex.org/I4389425349","display_name":"Sens, Texte, Informatique, Histoire","ror":"https://ror.org/02q5emw59","country_code":null,"type":"facility","lineage":["https://openalex.org/I39804081","https://openalex.org/I4389425349"]},{"id":"https://openalex.org/I39804081","display_name":"Sorbonne Universit\u00e9","ror":"https://ror.org/02en5vm52","country_code":"FR","type":"funder","lineage":["https://openalex.org/I39804081"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Alice Millour","raw_affiliation_strings":["Sorbonne Universit\u00e9 / STIH -EA 4509 28, rue Serpente, 75006 Paris, France"],"affiliations":[{"raw_affiliation_string":"Sorbonne Universit\u00e9 / STIH -EA 4509 28, rue Serpente, 75006 Paris, France","institution_ids":["https://openalex.org/I4389425349","https://openalex.org/I39804081"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074102447","display_name":"Kar\u00ebn Fort","orcid":"https://orcid.org/0000-0002-0723-8850"},"institutions":[{"id":"https://openalex.org/I4389425349","display_name":"Sens, Texte, Informatique, Histoire","ror":"https://ror.org/02q5emw59","country_code":null,"type":"facility","lineage":["https://openalex.org/I39804081","https://openalex.org/I4389425349"]},{"id":"https://openalex.org/I39804081","display_name":"Sorbonne Universit\u00e9","ror":"https://ror.org/02en5vm52","country_code":"FR","type":"funder","lineage":["https://openalex.org/I39804081"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Kar\u00ebn Fort","raw_affiliation_strings":["Sorbonne Universit\u00e9 / STIH -EA 4509 28, rue Serpente, 75006 Paris, France"],"affiliations":[{"raw_affiliation_string":"Sorbonne Universit\u00e9 / STIH -EA 4509 28, rue Serpente, 75006 Paris, France","institution_ids":["https://openalex.org/I4389425349","https://openalex.org/I39804081"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":3,"citation_normalized_percentile":{"value":0.57771,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":74,"max":76},"biblio":{"volume":null,"issue":null,"first_page":"776","last_page":"784"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9936,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spelling","display_name":"Spelling","score":0.9292992}],"concepts":[{"id":"https://openalex.org/C2777801307","wikidata":"https://www.wikidata.org/wiki/Q2088390","display_name":"Spelling","level":2,"score":0.9292992},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7527391},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6963233},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.63229126},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6093823},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.4899363},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4223285},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.26615/978-954-452-056-4_090","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://hal.archives-ouvertes.fr/hal-02280002","pdf_url":"https://hal.science/hal-02280002/document","source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":["Centre National de la Recherche Scientifique"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://hal.science/hal-02280002/file/Proceedings_of_Recent_Advances_in_Natural_Language_Processing.pdf","pdf_url":"https://hal.science/hal-02280002/file/Proceedings_of_Recent_Advances_in_Natural_Language_Processing.pdf","source":null,"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://hal.archives-ouvertes.fr/hal-02280002/file/Proceedings_of_Recent_Advances_in_Natural_Language_Processing.pdf","pdf_url":"https://hal.archives-ouvertes.fr/hal-02280002/file/Proceedings_of_Recent_Advances_in_Natural_Language_Processing.pdf","source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":["Centre National de la Recherche Scientifique"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.26615/978-954-452-056-4_090","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"score":0.67,"display_name":"Quality education","id":"https://metadata.un.org/sdg/4"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":14,"referenced_works":["https://openalex.org/W1553878378","https://openalex.org/W1602181184","https://openalex.org/W173527031","https://openalex.org/W1996771106","https://openalex.org/W2018662406","https://openalex.org/W2064043696","https://openalex.org/W2143995218","https://openalex.org/W2408555016","https://openalex.org/W2614278508","https://openalex.org/W2739967154","https://openalex.org/W2806054818","https://openalex.org/W3036397223","https://openalex.org/W4298345840","https://openalex.org/W4393639571"],"related_works":["https://openalex.org/W4308854837","https://openalex.org/W4298186509","https://openalex.org/W4226226396","https://openalex.org/W3153750606","https://openalex.org/W2556702969","https://openalex.org/W217221262","https://openalex.org/W2161008081","https://openalex.org/W2100947578","https://openalex.org/W2017877785","https://openalex.org/W1555832326"],"abstract_inverted_index":{"Non-standardized":[0],"languages":[1],"are":[2],"a":[3,28,31,41,45],"challenge":[4],"to":[5,13],"the":[6,14],"construction":[7],"of":[8,16,33,48],"representative":[9],"linguistic":[10],"resources":[11],"and":[12],"development":[15],"efficient":[17],"natural":[18],"language":[19],"processing":[20],"tools:":[21],"when":[22],"spelling":[23],"is":[24],"not":[25],"determined":[26],"by":[27],"consensual":[29],"norm,":[30],"multiplicity":[32],"alternative":[34],"written":[35],"forms":[36],"can":[37],"be":[38],"encountered":[39],"for":[40],"given":[42],"word,":[43],"inducing":[44],"large":[46],"proportion":[47],"out-of-vocabulary":[49],"words.":[50]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4288125826","counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2}],"updated_date":"2025-04-17T21:34:28.288781","created_date":"2022-07-28"}