{"id":"https://openalex.org/W4283692009","doi":"https://doi.org/10.48550/arxiv.2206.12840","title":"Your Autoregressive Generative Model Can be Better If You Treat It as an Energy-Based One","display_name":"Your Autoregressive Generative Model Can be Better If You Treat It as an Energy-Based One","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4283692009","doi":"https://doi.org/10.48550/arxiv.2206.12840"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2206.12840","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2206.12840","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027368431","display_name":"Yezhen Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yezhen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009243342","display_name":"Tong Che","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Che, Tong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100374360","display_name":"Bo Li","orcid":"https://orcid.org/0000-0001-6709-0942"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Bo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028035527","display_name":"Kaitao Song","orcid":"https://orcid.org/0000-0002-4046-8594"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Kaitao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036100753","display_name":"Hengzhi Pei","orcid":"https://orcid.org/0000-0001-7036-2996"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pei, Hengzhi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086198262","display_name":"Yoshua Bengio","orcid":"https://orcid.org/0000-0002-9322-3515"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bengio, Yoshua","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100440903","display_name":"Dongsheng Li","orcid":"https://orcid.org/0000-0001-9743-2034"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Dongsheng","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.609515,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":59,"max":69},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9678,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9678,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9667,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9193,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.5293518}],"concepts":[{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.9116092},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.72522295},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5508535},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5505974},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.5293518},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4690887},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.4662477},{"id":"https://openalex.org/C194657046","wikidata":"https://www.wikidata.org/wiki/Q7394685","display_name":"STAR model","level":4,"score":0.43329263},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.4246981},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.30823094},{"id":"https://openalex.org/C151406439","wikidata":"https://www.wikidata.org/wiki/Q186588","display_name":"Time series","level":2,"score":0.19192082},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14206594},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.124735594},{"id":"https://openalex.org/C24338571","wikidata":"https://www.wikidata.org/wiki/Q2566298","display_name":"Autoregressive integrated moving average","level":3,"score":0.1082809},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08484739},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2206.12840","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2206.12840","pdf_url":"http://arxiv.org/pdf/2206.12840","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2206.12840","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2206.12840","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.61}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3120578569","https://openalex.org/W2439807930","https://openalex.org/W2168175994","https://openalex.org/W2120434453","https://openalex.org/W2024529895","https://openalex.org/W2019155478","https://openalex.org/W2009692134","https://openalex.org/W1972271943","https://openalex.org/W1902630399","https://openalex.org/W1487412319"],"abstract_inverted_index":{"Autoregressive":[0],"generative":[1,62,132],"models":[2,63],"are":[3,85],"commonly":[4],"used,":[5],"especially":[6],"for":[7,59,97,130],"those":[8],"tasks":[9],"involving":[10],"sequential":[11],"data.":[12],"They":[13],"have,":[14],"however,":[15],"been":[16],"plagued":[17],"by":[18],"a":[19,54,68],"slew":[20],"of":[21,29,38,67,78,80,101,120,151],"inherent":[22],"flaws":[23],"due":[24],"to":[25,45,87],"the":[26,75,81,89,99,122,149,152],"intrinsic":[27],"characteristics":[28],"chain-style":[30],"conditional":[31],"modeling":[32],"(e.g.,":[33],"exposure":[34,123],"bias":[35,124],"or":[36],"lack":[37],"long-range":[39],"coherence),":[40],"severely":[41],"limiting":[42],"their":[43],"ability":[44],"model":[46,91,96],"distributions":[47],"properly.":[48],"In":[49],"this":[50],"paper,":[51],"we":[52,84,109],"propose":[53],"unique":[55],"method":[56],"termed":[57],"E-ARM":[58,112],"training":[60],"autoregressive":[61,90,131],"that":[64,111],"takes":[65],"advantage":[66],"well-designed":[69],"energy-based":[70,95],"learning":[71],"objective.":[72],"By":[73],"leveraging":[74],"extra":[76,106],"degree":[77],"freedom":[79],"softmax":[82],"operation,":[83],"allowed":[86],"make":[88],"itself":[92],"be":[93,114],"an":[94],"measuring":[98],"likelihood":[100],"input":[102],"without":[103],"introducing":[104],"any":[105],"parameters.":[107],"Furthermore,":[108],"show":[110],"can":[113],"trained":[115],"efficiently":[116],"and":[117,126,145],"is":[118],"capable":[119],"alleviating":[121],"problem":[125],"increase":[127],"temporal":[128],"coherence":[129],"models.":[133],"Extensive":[134],"empirical":[135],"results,":[136],"covering":[137],"benchmarks":[138],"like":[139],"language":[140],"modeling,":[141],"neural":[142],"machine":[143],"translation,":[144],"image":[146],"generation,":[147],"demonstrate":[148],"effectiveness":[150],"proposed":[153],"approach.":[154]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4283692009","counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2025-03-21T06:05:01.891415","created_date":"2022-06-30"}