{"id":"https://openalex.org/W4384659531","doi":"https://doi.org/10.1145/3539618.3592006","title":"LAPCA: Language-Agnostic Pretraining with Cross-Lingual Alignment","display_name":"LAPCA: Language-Agnostic Pretraining with Cross-Lingual Alignment","publication_year":2023,"publication_date":"2023-07-18","ids":{"openalex":"https://openalex.org/W4384659531","doi":"https://doi.org/10.1145/3539618.3592006"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3539618.3592006","pdf_url":null,"source":{"id":"https://openalex.org/S4363608773","display_name":"Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":"https://doi.org/10.1145/3539618.3592006","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063189132","display_name":"Dmitry Abulkhanov","orcid":"https://orcid.org/0000-0002-8758-2458"},"institutions":[{"id":"https://openalex.org/I4210159102","display_name":"Huawei Technologies (Sweden)","ror":"https://ror.org/0500fyd17","country_code":"SE","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210159102"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Dmitry Abulkhanov","raw_affiliation_strings":["Huawei Noah's Ark Lab, Moscow, Russian Fed."],"affiliations":[{"raw_affiliation_string":"Huawei Noah's Ark Lab, Moscow, Russian Fed.","institution_ids":["https://openalex.org/I4210159102"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027558716","display_name":"Nikita Sorokin","orcid":"https://orcid.org/0009-0002-2437-953X"},"institutions":[{"id":"https://openalex.org/I4210159102","display_name":"Huawei Technologies (Sweden)","ror":"https://ror.org/0500fyd17","country_code":"SE","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210159102"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Nikita Sorokin","raw_affiliation_strings":["Huawei Noah's Ark Lab, Moscow, Russian Fed."],"affiliations":[{"raw_affiliation_string":"Huawei Noah's Ark Lab, Moscow, Russian Fed.","institution_ids":["https://openalex.org/I4210159102"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045523675","display_name":"Sergey Nikolenko","orcid":"https://orcid.org/0000-0001-7787-2251"},"institutions":[{"id":"https://openalex.org/I2801160905","display_name":"Steklov Mathematical Institute","ror":"https://ror.org/03zeg8w71","country_code":"RU","type":"facility","lineage":["https://openalex.org/I1313323035","https://openalex.org/I2801160905","https://openalex.org/I4210124601"]},{"id":"https://openalex.org/I4210101868","display_name":"Institute for System Programming","ror":"https://ror.org/017ef8252","country_code":"RU","type":"facility","lineage":["https://openalex.org/I1313323035","https://openalex.org/I4210101868","https://openalex.org/I4210124601"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Sergey Nikolenko","raw_affiliation_strings":["Ivannikov Institute for System Programming of the RAS & Steklov Institute of Mathematics of the RAS, Moscow, Russian Fed."],"affiliations":[{"raw_affiliation_string":"Ivannikov Institute for System Programming of the RAS & Steklov Institute of Mathematics of the RAS, Moscow, Russian Fed.","institution_ids":["https://openalex.org/I2801160905","https://openalex.org/I4210101868"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006755109","display_name":"Valentin Malykh","orcid":"https://orcid.org/0000-0002-4508-2527"},"institutions":[{"id":"https://openalex.org/I4210159102","display_name":"Huawei Technologies (Sweden)","ror":"https://ror.org/0500fyd17","country_code":"SE","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210159102"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Valentin Malykh","raw_affiliation_strings":["Huawei Noah's Ark Lab, Moscow, Russian Fed."],"affiliations":[{"raw_affiliation_string":"Huawei Noah's Ark Lab, Moscow, Russian Fed.","institution_ids":["https://openalex.org/I4210159102"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.824,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":2,"citation_normalized_percentile":{"value":0.558059,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":78,"max":84},"biblio":{"volume":null,"issue":null,"first_page":"2098","last_page":"2102"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9758,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8250148},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.70561093},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.66871697},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.64061695},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.6388593},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.57369214},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5478026},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.4563414},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.41158867},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3539618.3592006","pdf_url":null,"source":{"id":"https://openalex.org/S4363608773","display_name":"Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3539618.3592006","pdf_url":null,"source":{"id":"https://openalex.org/S4363608773","display_name":"Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"display_name":"Quality education","id":"https://metadata.un.org/sdg/4","score":0.78}],"grants":[{"funder":"https://openalex.org/F4320319755","funder_display_name":"Analytical Center for the Government of the Russian Federation","award_id":"000000D730321P5Q0002"}],"datasets":[],"versions":[],"referenced_works_count":23,"referenced_works":["https://openalex.org/W1593271688","https://openalex.org/W2740132093","https://openalex.org/W2896457183","https://openalex.org/W2912924812","https://openalex.org/W2963339397","https://openalex.org/W2963748441","https://openalex.org/W3008140668","https://openalex.org/W3013840636","https://openalex.org/W3034469191","https://openalex.org/W3035390927","https://openalex.org/W3045462440","https://openalex.org/W3093871477","https://openalex.org/W3099700870","https://openalex.org/W3167303983","https://openalex.org/W3169937871","https://openalex.org/W3194157311","https://openalex.org/W3207095490","https://openalex.org/W4233907442","https://openalex.org/W4287210868","https://openalex.org/W4287640218","https://openalex.org/W4287887143","https://openalex.org/W4287896299","https://openalex.org/W4289647023"],"related_works":["https://openalex.org/W4388937922","https://openalex.org/W4382618745","https://openalex.org/W4288267738","https://openalex.org/W3113264705","https://openalex.org/W2964413124","https://openalex.org/W2885125400","https://openalex.org/W2595172197","https://openalex.org/W2127970246","https://openalex.org/W2084856301","https://openalex.org/W1989889224"],"abstract_inverted_index":{"Data":[0],"collection":[1],"and":[2,19,45,56,62,66],"mining":[3],"is":[4],"a":[5,24],"crucial":[6],"bottleneck":[7],"for":[8,53],"cross-lingual":[9,28,35,50,70],"information":[10],"retrieval":[11,58],"(CLIR).":[12],"While":[13],"previous":[14],"works":[15],"used":[16],"machine":[17],"translation":[18],"iterative":[20],"training,":[21],"we":[22],"present":[23],"novel":[25],"approach":[26],"to":[27],"pretraining":[29,33],"called":[30],"LAPCA":[31],"(language-agnostic":[32],"with":[34,75],"alignment).":[36],"We":[37],"train":[38],"the":[39,68],"LAPCA-LM":[40],"model":[41],"based":[42],"on":[43,73,82],"XLM-RoBERTa":[44],"\u0142exa":[46],"that":[47],"significantly":[48],"improves":[49],"knowledge":[51],"transfer":[52],"question":[54],"answering":[55],"sentence":[57],"on,":[59],"e.g.,":[60],"XOR-TyDi":[61],"Mr.":[63],"TyDi":[64],"datasets,":[65],"in":[67],"zero-shot":[69],"scenario":[71],"performs":[72],"par":[74],"supervised":[76],"methods,":[77],"outperforming":[78],"many":[79],"of":[80],"them":[81],"MKQA.":[83]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4384659531","counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2025-01-07T01:37:40.454049","created_date":"2023-07-20"}