{"id":"https://openalex.org/W4293791808","doi":"https://doi.org/10.48550/arxiv.2208.12367","title":"A Compact Pretraining Approach for Neural Language Models","display_name":"A Compact Pretraining Approach for Neural Language Models","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4293791808","doi":"https://doi.org/10.48550/arxiv.2208.12367"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2208.12367","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2208.12367","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040083359","display_name":"Shahriar Golchin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Golchin, Shahriar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047699502","display_name":"Mihai Surdeanu","orcid":"https://orcid.org/0000-0001-6956-8030"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Surdeanu, Mihai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000702925","display_name":"Nazgol Tavabi","orcid":"https://orcid.org/0000-0002-8877-622X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tavabi, Nazgol","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5041145380","display_name":"Ata M. Kiapour","orcid":"https://orcid.org/0000-0001-7742-5769"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kiapour, Ata","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":61},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Natural Language Processing","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Natural Language Processing","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Statistical Machine Translation and Natural Language Processing","score":0.9949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Automatic Keyword Extraction from Textual Data","score":0.9561,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5648452},{"id":"https://openalex.org/keywords/domain-adaptation","display_name":"Domain adaptation","score":0.55454063},{"id":"https://openalex.org/keywords/language-modeling","display_name":"Language Modeling","score":0.549977},{"id":"https://openalex.org/keywords/neural-machine-translation","display_name":"Neural Machine Translation","score":0.548513},{"id":"https://openalex.org/keywords/topic-modeling","display_name":"Topic Modeling","score":0.535397},{"id":"https://openalex.org/keywords/pretrained-models","display_name":"Pretrained Models","score":0.520596},{"id":"https://openalex.org/keywords/multilingual-neural-machine-translation","display_name":"Multilingual Neural Machine Translation","score":0.518184}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.802672},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.59794474},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.57026285},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5648452},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5568804},{"id":"https://openalex.org/C2776434776","wikidata":"https://www.wikidata.org/wiki/Q19246213","display_name":"Domain adaptation","level":3,"score":0.55454063},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.5377526},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5073952},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.49780965},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4854365},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.47030395},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3765667},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32896948},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.09389123},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.065253854},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2208.12367","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2208.12367","pdf_url":"http://arxiv.org/pdf/2208.12367","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2208.12367","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2208.12367","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4394775207","https://openalex.org/W4388145910","https://openalex.org/W4312490297","https://openalex.org/W4248336175","https://openalex.org/W3009369890","https://openalex.org/W2391445434","https://openalex.org/W2381570729","https://openalex.org/W2366107444","https://openalex.org/W2031260042","https://openalex.org/W1976205134"],"abstract_inverted_index":{"Domain":[0],"adaptation":[1],"for":[2,164],"large":[3],"neural":[4],"language":[5],"models":[6],"(NLMs)":[7],"is":[8,169],"coupled":[9],"with":[10,105],"massive":[11],"amounts":[12],"of":[13,40,64,119,166],"unstructured":[14,59,91],"data":[15,42,60],"in":[16,49],"the":[17,41,46,50,58,89,113,135],"pretraining":[18,151],"phase.":[19],"In":[20,70],"this":[21],"study,":[22],"however,":[23],"we":[24,72,145],"show":[25,146],"that":[26,43,112,147],"pretrained":[27,121],"NLMs":[28,120],"learn":[29],"in-domain":[30],"information":[31,48],"more":[32],"effectively":[33],"and":[34,67,80],"faster":[35],"from":[36,57,85],"a":[37,62],"compact":[38,55],"subset":[39],"focuses":[44],"on":[45,74,117,128,134],"key":[47],"domain.":[51],"We":[52,94],"construct":[53],"these":[54,86],"subsets":[56],"using":[61,98,122],"combination":[63],"abstractive":[65,78],"summaries":[66,87],"extractive":[68],"keywords.":[69],"particular,":[71],"rely":[73],"BART":[75],"to":[76,82,155,159],"generate":[77],"summaries,":[79],"KeyBERT":[81],"extract":[83],"keywords":[84],"(or":[88],"original":[90],"text":[92],"directly).":[93],"evaluate":[95],"our":[96,123,148,167],"approach":[97],"six":[99],"different":[100],"settings:":[101],"three":[102],"datasets":[103],"combined":[104],"two":[106],"distinct":[107],"NLMs.":[108],"Our":[109],"results":[110],"reveal":[111],"task-specific":[114],"classifiers":[115],"trained":[116],"top":[118],"method":[124],"outperform":[125],"methods":[126,141],"based":[127],"traditional":[129],"pretraining,":[130],"i.e.,":[131],"random":[132],"masking":[133],"entire":[136],"data,":[137],"as":[138,140],"well":[139],"without":[142],"pretraining.":[143,161],"Further,":[144],"strategy":[149],"reduces":[150],"time":[152],"by":[153],"up":[154],"five":[156],"times":[157],"compared":[158],"vanilla":[160],"The":[162],"code":[163],"all":[165],"experiments":[168],"publicly":[170],"available":[171],"at":[172],"https://github.com/shahriargolchin/compact-pretraining.":[173]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4293791808","counts_by_year":[],"updated_date":"2024-10-23T03:13:51.333461","created_date":"2022-08-31"}