{"id":"https://openalex.org/W4385572030","doi":"https://doi.org/10.18653/v1/2023.findings-acl.235","title":"Not Enough Data to Pre-train Your Language Model? MT to the Rescue!","display_name":"Not Enough Data to Pre-train Your Language Model? MT to the Rescue!","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4385572030","doi":"https://doi.org/10.18653/v1/2023.findings-acl.235"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2023.findings-acl.235","pdf_url":"https://aclanthology.org/2023.findings-acl.235.pdf","source":{"id":"https://openalex.org/S4363605144","display_name":"Findings of the Association for Computational Linguistics: ACL 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://aclanthology.org/2023.findings-acl.235.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024968551","display_name":"Gorka Urbizu","orcid":null},"institutions":[{"id":"https://openalex.org/I169108374","display_name":"University of the Basque Country","ror":"https://ror.org/000xsnr85","country_code":"ES","type":"education","lineage":["https://openalex.org/I169108374"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Gorka Urbizu","raw_affiliation_strings":[" University of the Basque Country"],"affiliations":[{"raw_affiliation_string":" University of the Basque Country","institution_ids":["https://openalex.org/I169108374"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044684628","display_name":"I\u00f1aki San Vicente","orcid":"https://orcid.org/0000-0003-1765-0555"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"I\u00f1aki San Vicente","raw_affiliation_strings":["Orai NLP Technologies"],"affiliations":[{"raw_affiliation_string":"Orai NLP Technologies","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067981207","display_name":"Xabier Saralegi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xabier Saralegi","raw_affiliation_strings":["Orai NLP Technologies"],"affiliations":[{"raw_affiliation_string":"Orai NLP Technologies","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035960421","display_name":"Ander Corral","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ander Corral","raw_affiliation_strings":["Orai NLP Technologies"],"affiliations":[{"raw_affiliation_string":"Orai NLP Technologies","institution_ids":[]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.412,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":2,"citation_normalized_percentile":{"value":0.558059,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":78,"max":84},"biblio":{"volume":null,"issue":null,"first_page":"3826","last_page":"3836"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9966,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9966,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9905,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9506,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.6031345}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8007853},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6117954},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.6031345},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.58054227},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.57779247},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5740336},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4931247},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41774857},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.12137285},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2023.findings-acl.235","pdf_url":"https://aclanthology.org/2023.findings-acl.235.pdf","source":{"id":"https://openalex.org/S4363605144","display_name":"Findings of the Association for Computational Linguistics: ACL 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2023.findings-acl.235","pdf_url":"https://aclanthology.org/2023.findings-acl.235.pdf","source":{"id":"https://openalex.org/S4363605144","display_name":"Findings of the Association for Computational Linguistics: ACL 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"display_name":"Quality education","id":"https://metadata.un.org/sdg/4","score":0.67}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":29,"referenced_works":["https://openalex.org/W2896457183","https://openalex.org/W2911489562","https://openalex.org/W2962784628","https://openalex.org/W2963212250","https://openalex.org/W2963979492","https://openalex.org/W3001279689","https://openalex.org/W3030931573","https://openalex.org/W3031596474","https://openalex.org/W3035032094","https://openalex.org/W3134155512","https://openalex.org/W3139401867","https://openalex.org/W3176198948","https://openalex.org/W3198443217","https://openalex.org/W3208063159","https://openalex.org/W4224308101","https://openalex.org/W4225591000","https://openalex.org/W4226146865","https://openalex.org/W4226399820","https://openalex.org/W4285077564","https://openalex.org/W4285208773","https://openalex.org/W4287067130","https://openalex.org/W4287822362","https://openalex.org/W4287854589","https://openalex.org/W4287890112","https://openalex.org/W4288090629","https://openalex.org/W4292779060","https://openalex.org/W4385245566","https://openalex.org/W4385681388","https://openalex.org/W630532510"],"related_works":["https://openalex.org/W4288365749","https://openalex.org/W4288267738","https://openalex.org/W4287826556","https://openalex.org/W4287598411","https://openalex.org/W3198458223","https://openalex.org/W3126642501","https://openalex.org/W3098382480","https://openalex.org/W3049463507","https://openalex.org/W3013624417","https://openalex.org/W2936497627"],"abstract_inverted_index":{"In":[0,31,77],"recent":[1],"years,":[2],"pre-trained":[3],"transformer-based":[4],"language":[5],"models":[6,21,86,110,120],"(LM)":[7],"have":[8,89],"become":[9],"a":[10,61],"key":[11],"resource":[12],"for":[13,41,59,87],"implementing":[14],"most":[15,29],"NLP":[16],"tasks.":[17],"However,":[18],"pre-training":[19,42],"such":[20],"demands":[22],"large":[23],"text":[24],"collections":[25],"not":[26],"available":[27],"in":[28],"languages.":[30],"this":[32],"paper,":[33],"we":[34],"study":[35],"the":[36,46,74,137],"use":[37],"of":[38],"machine-translated":[39],"corpora":[40],"LMs.":[43],"We":[44],"answer":[45],"following":[47],"research":[48,133],"questions:":[49],"RQ1:":[50],"Is":[51],"MT-based":[52],"data":[53,58,66,71,94,97,115,124],"an":[54],"alternative":[55],"to":[56,79],"real":[57,65,93,123],"learning":[60],"LM?;":[62],"RQ2:":[63],"Can":[64],"be":[67,126],"complemented":[68],"with":[69,122,128],"translated":[70,98,114],"and":[72,95],"improve":[73],"resulting":[75],"LM?":[76],"order":[78],"validate":[80],"these":[81],"two":[82],"questions,":[83],"several":[84],"BERT":[85],"Basque":[88],"been":[90],"trained,":[91],"combining":[92],"synthetic":[96,129],"from":[99],"Spanish.The":[100],"evaluation":[101],"carried":[102],"out":[103],"on":[104,113,136],"9":[105],"NLU":[106],"tasks":[107],"indicates":[108],"that":[109],"trained":[111,121],"exclusively":[112],"offer":[116],"competitive":[117],"results.":[118],"Furthermore,":[119],"can":[125],"improved":[127],"data,":[130],"although":[131],"further":[132],"is":[134],"needed":[135],"matter.":[138]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4385572030","counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2025-01-02T11:19:56.065665","created_date":"2023-08-05"}