{"id":"https://openalex.org/W4224330747","doi":"https://doi.org/10.48550/arxiv.2203.16187","title":"Auto-MLM: Improved Contrastive Learning for Self-supervised Multi-lingual Knowledge Retrieval","display_name":"Auto-MLM: Improved Contrastive Learning for Self-supervised Multi-lingual Knowledge Retrieval","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4224330747","doi":"https://doi.org/10.48550/arxiv.2203.16187"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2203.16187","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2203.16187","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100299294","display_name":"Wenshen Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Wenshen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037218373","display_name":"Mieradilijiang Maimaiti","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Maimaiti, Mieradilijiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016587219","display_name":"Yuanhang Zheng","orcid":"https://orcid.org/0000-0002-9357-645X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Yuanhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104095845","display_name":"Xin Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Xin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100329252","display_name":"Ji Zhang","orcid":"https://orcid.org/0000-0001-9544-9801"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Ji","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":59},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9966,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9966,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.9882,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9693,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8383293},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6894833},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.68200684},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.64509},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.5741089},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.48509157},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2203.16187","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2203.16187","pdf_url":"http://arxiv.org/pdf/2203.16187","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2203.16187","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2203.16187","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.86,"id":"https://metadata.un.org/sdg/4","display_name":"Quality education"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4388335561","https://openalex.org/W4385572700","https://openalex.org/W4385009901","https://openalex.org/W4307309205","https://openalex.org/W4288261899","https://openalex.org/W3204019825","https://openalex.org/W2970530566","https://openalex.org/W2967478618","https://openalex.org/W2387743295","https://openalex.org/W2384605597"],"abstract_inverted_index":{"Contrastive":[0],"learning":[1],"(CL)":[2],"has":[3],"become":[4],"a":[5,86,122],"ubiquitous":[6],"approach":[7,137],"for":[8,17,95,126],"several":[9],"natural":[10],"language":[11,51],"processing":[12],"(NLP)":[13],"downstream":[14],"tasks,":[15],"especially":[16],"question":[18],"answering":[19],"(QA).":[20],"However,":[21],"the":[22,29,41,57,66,70,74,81,103,113,128,141],"major":[23],"challenge,":[24],"how":[25],"to":[26],"efficiently":[27],"train":[28],"knowledge":[30,98],"retrieval":[31],"model":[32,52],"in":[33,156],"an":[34],"unsupervised":[35],"manner,":[36],"is":[37],"still":[38],"unresolved.":[39],"Recently":[40],"commonly":[42],"used":[43],"methods":[44,144],"are":[45],"composed":[46],"of":[47,65],"CL":[48,61,75,92],"and":[49,60,93,152],"masked":[50,129],"(MLM).":[53],"Unexpectedly,":[54],"MLM":[55],"ignores":[56],"sentence-level":[58],"training,":[59],"also":[62],"neglects":[63],"extraction":[64],"internal":[67,78],"info":[68],"from":[69,80],"query.":[71],"To":[72],"optimize":[73],"hardly":[76],"obtain":[77],"information":[79],"original":[82,114],"query,":[83],"we":[84,101,120],"introduce":[85],"joint":[87],"training":[88],"method":[89],"by":[90],"combining":[91],"Auto-MLM":[94],"self-supervised":[96],"multi-lingual":[97],"retrieval.":[99],"First,":[100],"acquire":[102],"fixed":[104],"dimensional":[105],"sentence":[106],"vector.":[107],"Then,":[108],"mask":[109],"some":[110],"words":[111],"among":[112],"sentences":[115],"with":[116],"random":[117],"strategy.":[118],"Finally,":[119],"generate":[121],"new":[123],"token":[124],"representation":[125],"predicting":[127],"tokens.":[130],"Experimental":[131],"results":[132],"show":[133],"that":[134],"our":[135],"proposed":[136],"consistently":[138],"outperforms":[139],"all":[140],"previous":[142],"SOTA":[143],"on":[145],"both":[146],"AliExpress":[147],"$\\&$":[148],"LAZADA":[149],"service":[150],"corpus":[151],"openly":[153],"available":[154],"corpora":[155],"8":[157],"languages.":[158]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4224330747","counts_by_year":[],"updated_date":"2025-03-05T02:56:41.674225","created_date":"2022-04-26"}