{"id":"https://openalex.org/W4385567868","doi":"https://doi.org/10.1145/3580305.3599921","title":"TwHIN-BERT: A Socially-Enriched Pre-trained Language Model for Multilingual Tweet Representations at Twitter","display_name":"TwHIN-BERT: A Socially-Enriched Pre-trained Language Model for Multilingual Tweet Representations at Twitter","publication_year":2023,"publication_date":"2023-08-04","ids":{"openalex":"https://openalex.org/W4385567868","doi":"https://doi.org/10.1145/3580305.3599921"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3580305.3599921","pdf_url":null,"source":{"id":"https://openalex.org/S4363608767","display_name":"Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.1145/3580305.3599921","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022999126","display_name":"Xinyang Zhang","orcid":"https://orcid.org/0000-0001-6474-682X"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xinyang Zhang","raw_affiliation_strings":["The University of Illinois at Urbana-Champaign, Urbana, IL, USA"],"affiliations":[{"raw_affiliation_string":"The University of Illinois at Urbana-Champaign, Urbana, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015209115","display_name":"Yury Malkov","orcid":"https://orcid.org/0000-0003-4324-6433"},"institutions":[{"id":"https://openalex.org/I113979032","display_name":"Twitter (United States)","ror":"https://ror.org/04wt43v05","country_code":"US","type":"company","lineage":["https://openalex.org/I113979032"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yury Malkov","raw_affiliation_strings":["Twitter Cortex, San Francisco, CA, USA"],"affiliations":[{"raw_affiliation_string":"Twitter Cortex, San Francisco, CA, USA","institution_ids":["https://openalex.org/I113979032"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047959451","display_name":"Omar U. Florez","orcid":"https://orcid.org/0009-0008-7884-8825"},"institutions":[{"id":"https://openalex.org/I113979032","display_name":"Twitter (United States)","ror":"https://ror.org/04wt43v05","country_code":"US","type":"company","lineage":["https://openalex.org/I113979032"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Omar Florez","raw_affiliation_strings":["Twitter Cortex, San Francisco, CA, USA"],"affiliations":[{"raw_affiliation_string":"Twitter Cortex, San Francisco, CA, USA","institution_ids":["https://openalex.org/I113979032"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007855897","display_name":"Serim Park","orcid":"https://orcid.org/0009-0004-0131-245X"},"institutions":[{"id":"https://openalex.org/I113979032","display_name":"Twitter (United States)","ror":"https://ror.org/04wt43v05","country_code":"US","type":"company","lineage":["https://openalex.org/I113979032"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Serim Park","raw_affiliation_strings":["Twitter Cortex, San Francisco, CA, USA"],"affiliations":[{"raw_affiliation_string":"Twitter Cortex, San Francisco, CA, USA","institution_ids":["https://openalex.org/I113979032"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041664883","display_name":"Brian McWilliams","orcid":"https://orcid.org/0009-0002-7433-1702"},"institutions":[{"id":"https://openalex.org/I113979032","display_name":"Twitter (United States)","ror":"https://ror.org/04wt43v05","country_code":"US","type":"company","lineage":["https://openalex.org/I113979032"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brian McWilliams","raw_affiliation_strings":["Twitter Cortex, San Francisco, CA, USA"],"affiliations":[{"raw_affiliation_string":"Twitter Cortex, San Francisco, CA, USA","institution_ids":["https://openalex.org/I113979032"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019539533","display_name":"Jiawei Han","orcid":"https://orcid.org/0000-0002-3629-2696"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiawei Han","raw_affiliation_strings":["The University of Illinois at Urbana-Champaign, Urbana, IL, USA"],"affiliations":[{"raw_affiliation_string":"The University of Illinois at Urbana-Champaign, Urbana, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035119055","display_name":"Ahmed El-Kishky","orcid":"https://orcid.org/0000-0003-0121-7781"},"institutions":[{"id":"https://openalex.org/I113979032","display_name":"Twitter (United States)","ror":"https://ror.org/04wt43v05","country_code":"US","type":"company","lineage":["https://openalex.org/I113979032"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ahmed El-Kishky","raw_affiliation_strings":["Twitter Cortex, San Francisco, CA, USA"],"affiliations":[{"raw_affiliation_string":"Twitter Cortex, San Francisco, CA, USA","institution_ids":["https://openalex.org/I113979032"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":8.332,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":21,"citation_normalized_percentile":{"value":0.999843,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"5597","last_page":"5607"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9972,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9944,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.58632135},{"id":"https://openalex.org/keywords/social-network","display_name":"Social network (sociolinguistics)","score":0.5462158},{"id":"https://openalex.org/keywords/representation","display_name":"Representation","score":0.45640787}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8371737},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.67087555},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.638161},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.63218015},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.59580284},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.58632135},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5795861},{"id":"https://openalex.org/C4727928","wikidata":"https://www.wikidata.org/wiki/Q17164759","display_name":"Social network (sociolinguistics)","level":3,"score":0.5462158},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.45640787},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3882569},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.34441847},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3580305.3599921","pdf_url":null,"source":{"id":"https://openalex.org/S4363608767","display_name":"Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2209.07562","pdf_url":"https://arxiv.org/pdf/2209.07562","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3580305.3599921","pdf_url":null,"source":{"id":"https://openalex.org/S4363608767","display_name":"Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality education","score":0.68}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":30,"referenced_works":["https://openalex.org/W1888005072","https://openalex.org/W2062797058","https://openalex.org/W2075010670","https://openalex.org/W2124509324","https://openalex.org/W2145658888","https://openalex.org/W2493916176","https://openalex.org/W2577283662","https://openalex.org/W2584620251","https://openalex.org/W2743104969","https://openalex.org/W2747329762","https://openalex.org/W2759136286","https://openalex.org/W2806198715","https://openalex.org/W2946328221","https://openalex.org/W2962739339","https://openalex.org/W2962756421","https://openalex.org/W2998702515","https://openalex.org/W3035390927","https://openalex.org/W3100848837","https://openalex.org/W3104097132","https://openalex.org/W3104186312","https://openalex.org/W3104249938","https://openalex.org/W3104987177","https://openalex.org/W3115081393","https://openalex.org/W3169483174","https://openalex.org/W4221153690","https://openalex.org/W4246649926","https://openalex.org/W4285190530","https://openalex.org/W4288089799","https://openalex.org/W4290875442","https://openalex.org/W4290943549"],"related_works":["https://openalex.org/W4321353415","https://openalex.org/W4246352526","https://openalex.org/W2966858528","https://openalex.org/W2745001401","https://openalex.org/W2378211422","https://openalex.org/W2151687600","https://openalex.org/W2130974462","https://openalex.org/W2086519370","https://openalex.org/W2028665553","https://openalex.org/W1482441085"],"abstract_inverted_index":{"Pre-trained":[0],"language":[1,8,47,66,140],"models":[2,67],"(PLMs)":[3],"are":[4,14],"fundamental":[5],"for":[6],"natural":[7],"processing":[9],"applications.":[10],"Most":[11],"existing":[12],"PLMs":[13],"not":[15,29,73],"tailored":[16],"to":[17,113,155],"the":[18,26,32,57,85,156],"noisy":[19],"user-generated":[20,117],"text":[21],"on":[22,53,84,100,123],"social":[23,34,40,59,81,87,126,151],"media,":[24],"and":[25,128,132,145,150],"pre-training":[27],"does":[28],"factor":[30],"in":[31,38],"valuable":[33,111],"engagement":[35,152],"logs":[36],"available":[37],"a":[39,45,80,90,110],"network.":[41,60],"We":[42,119,142],"present":[43],"TwHIN-BERT,":[44],"multilingual":[46,125],"model":[48,97,114,122],"productionized":[49],"at":[50],"Twitter,":[51],"trained":[52,71,99],"in-domain":[54],"data":[55],"from":[56,63],"popular":[58],"TwHIN-BERT":[61,144],"differs":[62],"prior":[64],"pre-trained":[65,139],"as":[68],"it":[69],"is":[70,98],"with":[72,79],"only":[74],"text-based":[75],"self-supervision":[76],"but":[77],"also":[78],"objective":[82],"based":[83],"rich":[86],"engagements":[88],"within":[89],"Twitter":[91],"heterogeneous":[92],"information":[93],"network":[94],"(TwHIN).":[95],"Our":[96],"7":[101],"billion":[102],"tweets":[103],"covering":[104],"over":[105,137],"100":[106],"distinct":[107],"languages,":[108],"providing":[109],"representation":[112],"short,":[115],"noisy,":[116],"text.":[118],"evaluate":[120],"our":[121,146],"various":[124],"recommendation":[127],"semantic":[129],"understanding":[130],"tasks":[131],"demonstrate":[133],"significant":[134],"metric":[135],"improvement":[136],"established":[138],"models.":[141],"open-source":[143],"curated":[147],"hashtag":[148],"prediction":[149],"benchmark":[153],"datasets":[154],"research":[157],"community.":[158]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4385567868","counts_by_year":[{"year":2024,"cited_by_count":15},{"year":2023,"cited_by_count":5}],"updated_date":"2025-01-19T15:28:09.192764","created_date":"2023-08-05"}