{"id":"https://openalex.org/W3104162037","doi":"https://doi.org/10.18653/v1/2020.findings-emnlp.46","title":"Revisiting Representation Degeneration Problem in Language Modeling","display_name":"Revisiting Representation Degeneration Problem in Language Modeling","publication_year":2020,"publication_date":"2020-01-01","ids":{"openalex":"https://openalex.org/W3104162037","doi":"https://doi.org/10.18653/v1/2020.findings-emnlp.46","mag":"3104162037"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2020.findings-emnlp.46","pdf_url":"https://www.aclweb.org/anthology/2020.findings-emnlp.46.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://www.aclweb.org/anthology/2020.findings-emnlp.46.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100350973","display_name":"Zhong Zhang","orcid":"https://orcid.org/0000-0003-1349-9755"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhong Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068892996","display_name":"Chongming Gao","orcid":"https://orcid.org/0000-0002-5187-9196"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chongming Gao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001629112","display_name":"Cong Xu","orcid":"https://orcid.org/0000-0001-5961-2210"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cong Xu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036927614","display_name":"Rui Miao","orcid":"https://orcid.org/0000-0002-2917-2311"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rui Miao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015410049","display_name":"Qinli Yang","orcid":"https://orcid.org/0000-0001-8966-6762"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qinli Yang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5088843448","display_name":"Junming Shao","orcid":"https://orcid.org/0000-0002-1183-2991"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Junming Shao","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.267,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.80582,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":91},"biblio":{"volume":null,"issue":null,"first_page":"518","last_page":"527"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.999,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization","score":0.83891404},{"id":"https://openalex.org/keywords/tying","display_name":"Tying","score":0.6440972},{"id":"https://openalex.org/keywords/representation","display_name":"Representation","score":0.4610137}],"concepts":[{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.83891404},{"id":"https://openalex.org/C2780938662","wikidata":"https://www.wikidata.org/wiki/Q973710","display_name":"Tying","level":2,"score":0.6440972},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6248396},{"id":"https://openalex.org/C165700671","wikidata":"https://www.wikidata.org/wiki/Q203484","display_name":"Laplace operator","level":2,"score":0.5896487},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4610137},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.45804757},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45131966},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.4404345},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.40121323},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3998501},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.26255977},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.06593171},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2020.findings-emnlp.46","pdf_url":"https://www.aclweb.org/anthology/2020.findings-emnlp.46.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2020.findings-emnlp.46","pdf_url":"https://www.aclweb.org/anthology/2020.findings-emnlp.46.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"score":0.59,"id":"https://metadata.un.org/sdg/4","display_name":"Quality education"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":26,"referenced_works":["https://openalex.org/W1514535095","https://openalex.org/W179875071","https://openalex.org/W1895577753","https://openalex.org/W1977193486","https://openalex.org/W2167999447","https://openalex.org/W2804323070","https://openalex.org/W2882319491","https://openalex.org/W2888799392","https://openalex.org/W2907252220","https://openalex.org/W2946232455","https://openalex.org/W2962832505","https://openalex.org/W2962964385","https://openalex.org/W2963341956","https://openalex.org/W2963347649","https://openalex.org/W2963403868","https://openalex.org/W2963494889","https://openalex.org/W2963537482","https://openalex.org/W2963748792","https://openalex.org/W2964073004","https://openalex.org/W2964110616","https://openalex.org/W2964308564","https://openalex.org/W2970184163","https://openalex.org/W2979746920","https://openalex.org/W2988217457","https://openalex.org/W2996657533","https://openalex.org/W3142831896"],"related_works":["https://openalex.org/W954011496","https://openalex.org/W3139928442","https://openalex.org/W3125367266","https://openalex.org/W3124892642","https://openalex.org/W2998037107","https://openalex.org/W2379437105","https://openalex.org/W2016024526","https://openalex.org/W1597439699","https://openalex.org/W1569871744","https://openalex.org/W1567336638"],"abstract_inverted_index":{"Weight":[0],"tying":[1],"is":[2,26,77,86],"now":[3],"a":[4,20,27,47,52,64],"common":[5],"setting":[6],"in":[7,30,46],"many":[8],"language":[9,14,53,129],"generation":[10],"tasks":[11],"such":[12],"as":[13,83],"modeling":[15,130],"and":[16,44,62,103],"machine":[17],"translation.":[18],"However,":[19],"recent":[21],"study":[22],"reveals":[23],"that":[24,35,73],"there":[25],"potential":[28],"flaw":[29],"weight":[31],"tying.":[32],"They":[33,55],"find":[34],"the":[36,58,74,81,84,99,106,109,125,132,135],"learned":[37],"word":[38],"embeddings":[39],"are":[40],"likely":[41,88],"to":[42,67,79,89,123],"degenerate":[43],"lie":[45],"narrow":[48],"cone":[49],"when":[50],"training":[51],"model.":[54],"call":[56],"it":[57],"representation":[59,100],"degeneration":[60,85,101],"problem":[61,102],"propose":[63,115],"cosine":[65,75],"regularization":[66,76,118,122],"solve":[68,80],"it.":[69],"Nevertheless,":[70],"we":[71,97,114],"prove":[72],"insufficient":[78],"problem,":[82],"still":[87],"happen":[90],"under":[91],"certain":[92],"conditions.":[93],"In":[94],"this":[95],"paper,":[96],"revisit":[98],"theoretically":[104],"analyze":[105],"limitations":[107],"of":[108,134],"previously":[110],"proposed":[111,136],"solution.":[112],"Afterward,":[113],"an":[116],"alternative":[117],"method":[119],"called":[120],"Laplacian":[121,137],"tackle":[124],"problem.":[126],"Experiments":[127],"on":[128],"demonstrate":[131],"effectiveness":[133],"regularization.":[138]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3104162037","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":5}],"updated_date":"2025-01-03T10:01:28.408252","created_date":"2020-11-23"}