{"id":"https://openalex.org/W4304697829","doi":"https://doi.org/10.48550/arxiv.2205.05131","title":"UL2: Unifying Language Learning Paradigms","display_name":"UL2: Unifying Language Learning Paradigms","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4304697829","doi":"https://doi.org/10.48550/arxiv.2205.05131"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2205.05131","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2205.05131","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103069680","display_name":"Yi Tay","orcid":"https://orcid.org/0000-0001-6896-4496"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tay, Yi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102906366","display_name":"Mostafa Dehghani","orcid":"https://orcid.org/0000-0002-9772-1095"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dehghani, Mostafa","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102435043","display_name":"Vinh Q. Tran","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tran, Vinh Q.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082383881","display_name":"Xavier Garc\u00eda","orcid":"https://orcid.org/0000-0002-8500-4224"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Garcia, Xavier","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100657725","display_name":"Jason Lee","orcid":"https://orcid.org/0000-0003-4042-795X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Jason","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024842018","display_name":"Xuezhi Wang","orcid":"https://orcid.org/0000-0001-7592-2358"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Xuezhi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051828575","display_name":"Hyung Won Chung","orcid":"https://orcid.org/0000-0002-1280-9953"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chung, Hyung Won","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036477705","display_name":"Dara Bahri","orcid":"https://orcid.org/0000-0003-0144-2911"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bahri, Dara","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058037733","display_name":"Tal Schuster","orcid":"https://orcid.org/0000-0002-7772-8230"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schuster, Tal","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003807260","display_name":"Huaixiu Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Huaixiu Steven","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061512999","display_name":"Denny Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Denny","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068878643","display_name":"Neil Houlsby","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Houlsby, Neil","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5000115067","display_name":"Donald Metzler","orcid":"https://orcid.org/0000-0003-4276-6269"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Metzler, Donald","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":76,"citation_normalized_percentile":{"value":0.953382,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9649,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.80964077},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6146242},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.6096692},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6048845},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.56875175},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5395777},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.48584363},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.47336978},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.46126443},{"id":"https://openalex.org/C137635306","wikidata":"https://www.wikidata.org/wiki/Q182667","display_name":"Pareto principle","level":2,"score":0.45600593},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.41323203},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2205.05131","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2205.05131","pdf_url":"http://arxiv.org/pdf/2205.05131","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2205.05131","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2205.05131","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4389760904","https://openalex.org/W4323520239","https://openalex.org/W4306886878","https://openalex.org/W4242223894","https://openalex.org/W3148229873","https://openalex.org/W2366403280","https://openalex.org/W2150160875","https://openalex.org/W2091301346","https://openalex.org/W1517524280","https://openalex.org/W1495108544"],"abstract_inverted_index":{"Existing":[0],"pre-trained":[1],"models":[2,39,151],"are":[3,41,61],"generally":[4],"geared":[5],"towards":[6],"a":[7,34,67,102,114,230],"particular":[8],"class":[9],"of":[10,116,195,235],"problems.":[11],"To":[12],"date,":[13],"there":[14],"seems":[15],"to":[16,133,161,232,244,255],"be":[17,83,95],"still":[18],"no":[19],"consensus":[20],"on":[21,168,188,197],"what":[22],"the":[23,144,193,245,264],"right":[24],"architecture":[25],"and":[26,46,76,88,138,191,207,218,251],"pre-training":[27,38,55,80,103,108,126,136],"setup":[28],"should":[29],"be.":[30],"This":[31],"paper":[32],"presents":[33],"unified":[35,70],"framework":[36],"for":[37,72,225,263],"that":[40,60,105,140],"universally":[42],"effective":[43],"across":[44,152],"datasets":[45],"setups.":[47,155],"We":[48,97,111,128,258],"begin":[49],"by":[50,146],"disentangling":[51],"architectural":[52],"archetypes":[53],"with":[54,124,215],"objectives":[56,81,93,137],"--":[57],"two":[58],"concepts":[59],"commonly":[62],"conflated.":[63],"Next,":[64],"we":[65,164,239],"present":[66],"generalized":[68],"&":[69,149,267],"perspective":[71],"self-supervision":[73],"in":[74],"NLP":[75,174],"show":[77],"how":[78,89],"different":[79,92],"can":[82,94],"cast":[84],"as":[85],"one":[86],"another":[87],"interpolating":[90],"between":[91],"effective.":[96],"then":[98],"propose":[99],"Mixture-of-Denoisers":[100],"(MoD),":[101],"objective":[104],"combines":[106],"diverse":[107,154],"paradigms":[109],"together.":[110],"furthermore":[112],"introduce":[113],"notion":[115],"mode":[117],"switching,":[118],"wherein":[119],"downstream":[120],"fine-tuning":[121],"is":[122],"associated":[123],"specific":[125],"schemes.":[127],"conduct":[129],"extensive":[130],"ablative":[131],"experiments":[132],"compare":[134],"multiple":[135,153],"find":[139],"our":[141,158],"method":[142],"pushes":[143],"Pareto-frontier":[145],"outperforming":[147,185],"T5":[148,208],"GPT-like":[150],"By":[156],"scaling":[157],"model":[159,177],"up":[160],"20B":[162,204,211,236,247,266],"parameters,":[163],"achieve":[165,179],"SOTA":[166],"performance":[167,194],"50":[169],"well-established":[170],"supervised":[171],"finetuning":[172],"based":[173],"tasks.":[175],"Our":[176],"also":[178,212],"strong":[180],"results":[181],"at":[182,229],"in-context":[183],"learning,":[184],"175B":[186],"GPT-3":[187],"zero-shot":[189],"SuperGLUE":[190],"tripling":[192],"T5-XXL":[196],"one-shot":[198],"summarization.":[199],"On":[200],"0-shot":[201],"MMLU,":[202],"UL2":[203,210,246,265],"outperforms":[205],"T0":[206],"models.":[209],"works":[213],"well":[214],"chain-of-thought":[216],"prompting":[217],"reasoning,":[219],"making":[220],"it":[221],"an":[222],"appealing":[223],"choice":[224],"research":[226],"into":[227],"reasoning":[228],"small":[231],"medium":[233],"scale":[234],"parameters.":[237],"Finally,":[238],"apply":[240],"FLAN":[241],"instruction":[242],"tuning":[243],"model,":[248],"achieving":[249],"MMLU":[250],"Big-Bench":[252],"scores":[253],"competitive":[254],"FLAN-PaLM":[256],"62B.":[257],"release":[259],"Flax-based":[260],"T5X":[261],"checkpoints":[262],"Flan-UL2":[268],"20B.":[269]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4304697829","counts_by_year":[{"year":2024,"cited_by_count":28},{"year":2023,"cited_by_count":46},{"year":2022,"cited_by_count":1}],"updated_date":"2025-01-22T09:57:45.724458","created_date":"2022-10-12"}