{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:20:41Z","timestamp":1740122441572,"version":"3.37.3"},"reference-count":84,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2021,2,19]],"date-time":"2021-02-19T00:00:00Z","timestamp":1613692800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,2,19]],"date-time":"2021-02-19T00:00:00Z","timestamp":1613692800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100010661","name":"Horizon 2020 Framework Programme","doi-asserted-by":"publisher","award":["871042","951911"],"id":[{"id":"10.13039\/100010661","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100010661","name":"Horizon 2020 Framework Programme","doi-asserted-by":"publisher","award":["823914"],"id":[{"id":"10.13039\/100010661","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Data Min Knowl Disc"],"published-print":{"date-parts":[[2021,5]]},"DOI":"10.1007\/s10618-020-00735-3","type":"journal-article","created":{"date-parts":[[2021,2,19]],"date-time":"2021-02-19T21:09:28Z","timestamp":1613768968000},"page":"911-963","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":32,"title":["Word-class embeddings for multiclass text classification"],"prefix":"10.1007","volume":"35","author":[{"given":"Alejandro","family":"Moreo","sequence":"first","affiliation":[]},{"given":"Andrea","family":"Esuli","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4221-6427","authenticated-orcid":false,"given":"Fabrizio","family":"Sebastiani","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,2,19]]},"reference":[{"key":"735_CR1","first-page":"1817","volume":"6","author":"RK Ando","year":"2005","unstructured":"Ando RK, Zhang T (2005) A framework for learning predictive structures from multiple tasks and unlabeled data. J Mach Learn Res 6:1817\u20131853","journal-title":"J Mach Learn Res"},{"key":"735_CR2","doi-asserted-by":"publisher","unstructured":"Baker D, McCallum AK (1998) Distributional clustering of words for text classification. In: Proceedings of the 21st ACM international conference on research and development in information retrieval (SIGIR 1998), Melbourne, AU, pp 96\u2013103. https:\/\/doi.org\/10.1145\/290941.290970","DOI":"10.1145\/290941.290970"},{"key":"735_CR3","unstructured":"Baldi P (2011) Autoencoders, unsupervised learning, and deep architectures. In: Proceedings of the ICML 2011 workshop on unsupervised and transfer learning, Bellevue, US, pp 37\u201349"},{"key":"735_CR4","doi-asserted-by":"publisher","unstructured":"Baroni M, Dinu G, Kruszewski G (2014) Don\u2019t count, predict! A systematic comparison of context-counting vs. context-predicting semantic vectors. In: Proceedings of the 52nd annual meeting of the association for computational linguistics (ACL 2014), Baltimore, US, pp 238\u2013247. https:\/\/doi.org\/10.3115\/v1\/p14-1023","DOI":"10.3115\/v1\/p14-1023"},{"key":"735_CR5","first-page":"1183","volume":"3","author":"R Bekkerman","year":"2003","unstructured":"Bekkerman R, El-Yaniv R, Tishby N, Winter Y (2003) Distributional word clusters vs. words for text categorization. J Mach Learn Res 3:1183\u20131208","journal-title":"J Mach Learn Res"},{"key":"735_CR6","first-page":"1137","volume":"3","author":"Y Bengio","year":"2003","unstructured":"Bengio Y, Ducharme R, Vincent P, Jauvin C (2003) A neural probabilistic language model. J Mach Learn Res 3:1137\u20131155","journal-title":"J Mach Learn Res"},{"key":"735_CR7","doi-asserted-by":"publisher","unstructured":"Berardi G, Esuli A, Macdonald C, Ounis I, Sebastiani F (2015) Semi-automated text classification for sensitivity identification. In: Proceedings of the 24th ACM international conference on information and knowledge management (CIKM 2015), Melbourne, AU, pp 1711\u20131714. https:\/\/doi.org\/10.1145\/2806416.2806597","DOI":"10.1145\/2806416.2806597"},{"key":"735_CR8","unstructured":"Bhatia K, Jain H, Kar P, Varma M, Jain P (2015) Sparse local embeddings for extreme multi-label classification. In: Proceedings of the 29th annual conference on neural information processing systems (NIPS 2015), Montreal, CA, pp 730\u2013738"},{"key":"735_CR9","first-page":"993","volume":"3","author":"DM Blei","year":"2003","unstructured":"Blei DM, Ng AY, Jordan MI (2003) Latent Dirichlet allocation. J Mach Learn Res 3:993\u20131022","journal-title":"J Mach Learn Res"},{"key":"735_CR10","doi-asserted-by":"publisher","unstructured":"Blitzer J, McDonald R, Pereira F (2006) Domain adaptation with structural correspondence learning. In: Proceedings of the 4th conference on empirical methods in natural language processing (EMNLP 2006), Sydney, AU, pp 120\u2013128. https:\/\/doi.org\/10.3115\/1610075.1610094","DOI":"10.3115\/1610075.1610094"},{"key":"735_CR11","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1162\/tacl_a_00051","volume":"5","author":"P Bojanowski","year":"2017","unstructured":"Bojanowski P, Grave E, Joulin A, Mikolov T (2017) Enriching word vectors with subword information. Trans Assoc Comput Linguist 5:135\u2013146. https:\/\/doi.org\/10.1162\/tacl_a_00051","journal-title":"Trans Assoc Comput Linguist"},{"issue":"3","key":"735_CR12","doi-asserted-by":"publisher","first-page":"510","DOI":"10.3758\/bf03193020","volume":"39","author":"JA Bullinaria","year":"2007","unstructured":"Bullinaria JA, Levy JP (2007) Extracting semantic representations from word co-occurrence statistics: a computational study. Behav Res Methods 39(3):510\u2013526. https:\/\/doi.org\/10.3758\/bf03193020","journal-title":"Behav Res Methods"},{"key":"735_CR13","doi-asserted-by":"publisher","first-page":"743","DOI":"10.1613\/jair.1.11259","volume":"63","author":"J Camacho-Collados","year":"2018","unstructured":"Camacho-Collados J, Pilehvar MT (2018) From word to sense embeddings: a survey on vector representations of meaning. J Artif Intell Res 63:743\u2013788. https:\/\/doi.org\/10.1613\/jair.1.11259","journal-title":"J Artif Intell Res"},{"key":"735_CR14","doi-asserted-by":"publisher","unstructured":"Caruana R (1993) Multitask learning: A knowledge-based source of inductive bias. In: Proceedings of the 10th international conference on machine learning (ICML 1993), Amherst, US, pp 41\u201348. https:\/\/doi.org\/10.1016\/b978-1-55860-307-3.50012-5","DOI":"10.1016\/b978-1-55860-307-3.50012-5"},{"key":"735_CR15","first-page":"2493","volume":"12","author":"R Collobert","year":"2011","unstructured":"Collobert R, Weston J, Bottou L, Karlen M, Kavukcuoglu K, Kuksa P (2011) Natural language processing (almost) from scratch. J Mach Learn Res 12:2493\u20132537","journal-title":"J Mach Learn Res"},{"issue":"3","key":"735_CR16","first-page":"273","volume":"20","author":"C Cortes","year":"1995","unstructured":"Cortes C, Vapnik V (1995) Support vector networks. Mach Learn 20(3):273\u2013297","journal-title":"Mach Learn"},{"key":"735_CR17","unstructured":"Daum\u00e9 H (2007) Frustratingly easy domain adaptation. In: Proceedings of the 45th annual meeting of the association for computational linguistics (ACL 2007), Prague, CZ, pp 256\u2013263"},{"key":"735_CR18","doi-asserted-by":"publisher","unstructured":"Debole F, Sebastiani F (2003) Supervised term weighting for automated text categorization. In: Proceedings of the 18th ACM symposium on applied computing (SAC 2003), Melbourne, US, pp 784\u2013788. https:\/\/doi.org\/10.1145\/952532.952688","DOI":"10.1145\/952532.952688"},{"issue":"6","key":"735_CR19","doi-asserted-by":"publisher","first-page":"391","DOI":"10.1002\/(SICI)1097-4571(199009)41:6<391::AID-ASI1>3.0.CO;2-9","volume":"41","author":"S Deerwester","year":"1990","unstructured":"Deerwester S, Dumais ST, Furnas GW, Landauer TK, Harshman R (1990) Indexing by latent semantic analysis. J Am Soc Inf Sci 41(6):391\u2013407","journal-title":"J Am Soc Inf Sci"},{"key":"735_CR20","unstructured":"Devlin J, Chang M, Lee K, Toutanova K (2019) BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics (NAACL 2019), Minneapolis, US, pp 4171\u20134186"},{"key":"735_CR21","doi-asserted-by":"publisher","first-page":"30548","DOI":"10.1109\/access.2019.2954985","volume":"8","author":"Y Dong","year":"2020","unstructured":"Dong Y, Liu P, Zhu Z, Wang Q, Zhang Q (2020) A fusion model-based label embedding and self-interaction attention for text classification. IEEE Access 8:30548\u201330559. https:\/\/doi.org\/10.1109\/access.2019.2954985","journal-title":"IEEE Access"},{"key":"735_CR22","doi-asserted-by":"publisher","unstructured":"Dumais ST, Platt J, Heckerman D, Sahami M (1998) Inductive learning algorithms and representations for text categorization. In: Proceedings of the 7th ACM international conference on information and knowledge management (CIKM 1998), Bethesda, US, pp 148\u2013155. https:\/\/doi.org\/10.1145\/288627.288651","DOI":"10.1145\/288627.288651"},{"key":"735_CR23","first-page":"625","volume":"11","author":"D Erhan","year":"2010","unstructured":"Erhan D, Bengio Y, Courville A, Manzagol PA, Vincent P, Bengio S (2010) Why does unsupervised pre-training help deep learning? J Mach Learn Res 11:625\u2013660","journal-title":"J Mach Learn Res"},{"key":"735_CR24","doi-asserted-by":"publisher","unstructured":"Forman G (2004) A pitfall and solution in multi-class feature selection for text classification. In: Proceedings of the 21st international conference on machine learning (ICML 2004), Banff, CA, pp 38\u201345. https:\/\/doi.org\/10.1145\/1015330.1015356","DOI":"10.1145\/1015330.1015356"},{"key":"735_CR25","doi-asserted-by":"publisher","unstructured":"Garneau N, Leboeuf J, Lamontagne L (2019) Contextual generation of word embeddings for out-of-vocabulary words in downstream tasks. In: Proceedings of the 32nd Canadian conference on artificial intelligence (Canadian AI), Kingston, CA, pp 563\u2013569. https:\/\/doi.org\/10.1007\/978-3-030-18305-9_60","DOI":"10.1007\/978-3-030-18305-9_60"},{"key":"735_CR26","unstructured":"Glorot X, Bengio Y (2010) Understanding the difficulty of training deep feedforward neural networks. In: Proceedings of the 13th international conference on artificial intelligence and statistics (AISTATS 2010), Chia Laguna, Italy, pp 249\u2013256"},{"issue":"5","key":"735_CR27","doi-asserted-by":"publisher","first-page":"74:1","DOI":"10.1145\/3117807","volume":"50","author":"P Gonz\u00e1lez","year":"2017","unstructured":"Gonz\u00e1lez P, Casta\u00f1o A, Chawla NV, del Coz JJ (2017) A review on quantification learning. ACM Comput Surv 50(5):74:1\u201374:40. https:\/\/doi.org\/10.1145\/3117807","journal-title":"ACM Comput Surv"},{"key":"735_CR28","doi-asserted-by":"publisher","unstructured":"Grave E, Mikolov T, Joulin A, Bojanowski P (2017) Bag of tricks for efficient text classification. In: Proceedings of the 15th conference of the European chapter of the association for computational linguistics (EACL 2017), Valencia, ES, pp 427\u2013431. https:\/\/doi.org\/10.18653\/v1\/e17-2068","DOI":"10.18653\/v1\/e17-2068"},{"key":"735_CR29","doi-asserted-by":"publisher","first-page":"67","DOI":"10.3389\/fams.2019.00067","volume":"5","author":"S Gupta","year":"2019","unstructured":"Gupta S, Kanchinadam T, Conathan D, Fung G (2019) Task-optimized word embeddings for text classification representations. Front Appl Math Stat 5:67","journal-title":"Front Appl Math Stat"},{"issue":"2\u20133","key":"735_CR30","doi-asserted-by":"publisher","first-page":"146","DOI":"10.1007\/978-94-017-6059-1_36","volume":"10","author":"ZS Harris","year":"1954","unstructured":"Harris ZS (1954) Distributional structure. Word 10(2\u20133):146\u2013162. https:\/\/doi.org\/10.1007\/978-94-017-6059-1_36","journal-title":"Word"},{"key":"735_CR31","doi-asserted-by":"publisher","unstructured":"Hersh W, Buckley C, Leone T, Hickman D (1994) OHSUMED: an interactive retrieval evaluation and new large text collection for research. In: Proceedings of the 17th ACM international conference on research and development in information retrieval (SIGIR 1994), Dublin, IE, pp 192\u2013201. https:\/\/doi.org\/10.1007\/978-1-4471-2099-5_20","DOI":"10.1007\/978-1-4471-2099-5_20"},{"issue":"8","key":"735_CR32","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput 9(8):1735\u20131780","journal-title":"Neural Comput"},{"key":"735_CR33","unstructured":"Hsu DJ, Kakade SM, Langford J, Zhang T (2009) Multi-label prediction via compressed sensing. In: Proceedings of the 23rd annual conference on neural information processing systems (NIPS 2009), Vancouver, CA, pp 772\u2013780"},{"issue":"1","key":"735_CR34","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1007\/s00521-016-2401-x","volume":"29","author":"M Jiang","year":"2018","unstructured":"Jiang M, Liang Y, Feng X, Fan X, Pei Z, Xue Y, Guan R (2018) Text classification based on deep belief network and softmax regression. Neural Comput Appl 29(1):61\u201370. https:\/\/doi.org\/10.1007\/s00521-016-2401-x","journal-title":"Neural Comput Appl"},{"key":"735_CR35","unstructured":"Jin P, Zhang Y, Chen X, Xia Y (2016) Bag-of-embeddings for text classification. In: Proceedings of the 26th international joint conference on artificial intelligence (IJCAI 2016), New York, US, pp 2824\u20132830"},{"key":"735_CR36","doi-asserted-by":"publisher","unstructured":"Joachims T (1998) Text categorization with support vector machines: learning with many relevant features. In: Proceedings of the 10th European conference on machine learning (ECML 1998), Chemnitz, DE, pp 137\u2013142. https:\/\/doi.org\/10.1007\/bfb0026683","DOI":"10.1007\/bfb0026683"},{"key":"735_CR37","doi-asserted-by":"publisher","unstructured":"Joachims T (2001) A statistical learning model of text classification for support vector machines. In: Proceedings of the 24th ACM conference on research and development in information retrieval (SIGIR 2001), New Orleans, US, pp 128\u2013136. https:\/\/doi.org\/10.1145\/383952.383974","DOI":"10.1145\/383952.383974"},{"key":"735_CR38","doi-asserted-by":"crossref","unstructured":"Kim Y (2014) Convolutional neural networks for sentence classification. In: Proceedings of the 2014 conference on empirical methods in natural language processing (EMNLP 2014), Doha, QA, pp 1746\u20131751","DOI":"10.3115\/v1\/D14-1181"},{"key":"735_CR39","doi-asserted-by":"crossref","unstructured":"Kim Y, Jernite Y, Sontag D, Rush AM (2016) Character-aware neural language models. In: Proceedings of the 30th AAAI conference on artificial intelligence (AAAI 2016), Phoenix, US, pp 2741\u20132749","DOI":"10.1609\/aaai.v30i1.10362"},{"key":"735_CR40","unstructured":"Kingma DP, Ba J (2015) Adam: a method for stochastic optimization. In: Proceedings of the 3rd international conference on learning representations (ICLR 2015), San Diego, US"},{"key":"735_CR41","doi-asserted-by":"crossref","unstructured":"Lai S, Xu L, Liu K, Zhao J (2015) Recurrent convolutional neural networks for text classification. In: Proceedings of the 29th AAAI conference on artificial intelligence (AAAI 2015), Austin, US, pp 2267\u20132273","DOI":"10.1609\/aaai.v29i1.9513"},{"issue":"4","key":"735_CR42","doi-asserted-by":"publisher","first-page":"721","DOI":"10.1109\/TPAMI.2008.110","volume":"31","author":"M Lan","year":"2009","unstructured":"Lan M, Tan CL, Su J, Lu Y (2009) Supervised and traditional term weighting methods for automatic text categorization. IEEE Trans Pattern Anal Mach Intell 31(4):721\u2013735","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"735_CR43","unstructured":"Le HT, Cerisara C, Denis A (2018) Do convolutional networks need to be deep for text classification?. In: Proceedings of the AAAI 2018 workshop on affective content analysis, New Orleans, US, pp 29\u201336"},{"issue":"7553","key":"735_CR44","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun Y, Bengio Y, Hinton G (2015) Deep learning. Nature 521(7553):436\u2013444","journal-title":"Nature"},{"key":"735_CR45","doi-asserted-by":"crossref","unstructured":"Lei X, Cai Y, Xu J, Ren D, Li Q, Leung HF (2019) Incorporating task-oriented representation in text classification. In: Proceedings of the 24th international conference on database systems for advanced applications (DASFAA 2019), Chiang Mai, TH, pp 401\u2013415","DOI":"10.1007\/978-3-030-18579-4_24"},{"key":"735_CR46","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1162\/tacl_a_00134","volume":"3","author":"O Levy","year":"2015","unstructured":"Levy O, Goldberg Y, Dagan I (2015) Improving distributional similarity with lessons learned from word embeddings. Trans Assoc Comput Linguist 3:211\u2013225","journal-title":"Trans Assoc Comput Linguist"},{"key":"735_CR47","unstructured":"Levy O, Goldberg Y (2014) Neural word embedding as implicit matrix factorization. In: Proceedings of the 28th annual conference on neural information processing systems (NIPS 2014), Montreal, CA, pp 2177\u20132185"},{"key":"735_CR48","doi-asserted-by":"crossref","unstructured":"Lewis DD (1992) An evaluation of phrasal and clustered representations on a text categorization task. In: Proceedings of the 15th ACM international conference on research and development in information retrieval (SIGIR 1992), Kobenhavn, DK, pp 37\u201350","DOI":"10.1145\/133160.133172"},{"issue":"1","key":"735_CR49","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1145\/3308774.3308781","volume":"52","author":"J Lin","year":"2019","unstructured":"Lin J (2019) The neural hype and comparisons against weak baselines. SIGIR Forum 52(1):40\u201351","journal-title":"SIGIR Forum"},{"key":"735_CR50","doi-asserted-by":"crossref","unstructured":"Luong T, Pham H, Manning CD (2015) Effective approaches to attention-based neural machine translation. In: Proceedings of the 2015 conference on empirical methods in natural language processing (EMNLP 2015), Lisbon, PT, pp 1412\u20131421","DOI":"10.18653\/v1\/D15-1166"},{"key":"735_CR51","unstructured":"McCann B, Bradbury J, Xiong C, Socher R (2017) Learned in translation: contextualized word vectors. In: Proceedings of the 31st annual conference on neural information processing systems (NIPS 2017), Long Beach, US, pp 6294\u20136305"},{"key":"735_CR52","unstructured":"Mikolov T, Chen K, Corrado G, Dean J (2013a) Efficient estimation of word representations in vector space. In: Workshop track proceedings of the 1st international conference on learning representations (ICLR 2013), Scottsdale, US"},{"key":"735_CR53","unstructured":"Mikolov T, Grave E, Bojanowski P, Puhrsch C, Joulin A (2018) Advances in pre-training distributed word representations. In: Proceedings of the 11th international conference on language resources and evaluation (LREC 2018), Miyazaki, JP"},{"key":"735_CR54","unstructured":"Mikolov T, Sutskever I, Chen K, Corrado GS, Dean J (2013b) Distributed representations of words and phrases and their compositionality. In: Proceedings of the 27th annual conference on neural information processing systems (NIPS 2013), Lake Tahoe, US, pp 3111\u20133119"},{"key":"735_CR55","unstructured":"Mnih A, Kavukcuoglu K (2013) Learning word embeddings efficiently with noise-contrastive estimation. In: Proceedings of the 27th annual conference on neural information processing systems (NIPS 2013), Lake Tahoe, US, pp 2265\u20132273"},{"key":"735_CR56","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1613\/jair.4762","volume":"55","author":"A Moreo","year":"2016","unstructured":"Moreo A, Esuli A, Sebastiani F (2016) Distributional correspondence indexing for cross-lingual and cross-domain sentiment classification. J Artif Intell Res 55:131\u2013163","journal-title":"J Artif Intell Res"},{"issue":"2","key":"735_CR57","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1109\/TKDE.2018.2883446","volume":"32","author":"A Moreo","year":"2020","unstructured":"Moreo A, Esuli A, Sebastiani F (2020) Learning to weight for text classification. IEEE Trans Knowl Data Eng 32(2):302\u2013316. https:\/\/doi.org\/10.1109\/TKDE.2018.2883446","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"735_CR58","doi-asserted-by":"publisher","unstructured":"Moreo A, Pedrotti A, Sebastiani F (2021) Heterogeneous document embeddings for cross-lingual text classification. In: Proceedings of the 36th ACM symposium on applied computing (SAC 2021), Gwangju, KR. https:\/\/doi.org\/10.1145\/3412841.3442093(forthcoming)","DOI":"10.1145\/3412841.3442093"},{"key":"735_CR59","unstructured":"Morik K, Brockhausen P, Joachims T (1999) Combining statistical learning with a knowledge-based approach. A case study in intensive care monitoring. In: Proceedings of the 16th international conference on machine learning (ICML 1999), Bled, SL, pp 268\u2013277"},{"issue":"1\/2","key":"735_CR60","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1561\/1500000011","volume":"2","author":"B Pang","year":"2008","unstructured":"Pang B, Lee L (2008) Opinion mining and sentiment analysis. Found Trends Inf Retr 2(1\/2):1\u2013135","journal-title":"Found Trends Inf Retr"},{"key":"735_CR61","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1162\/tacl_a_00259","volume":"7","author":"N Pappas","year":"2019","unstructured":"Pappas N, Henderson J (2019) Gile: a generalized input-label embedding for text classification. Trans Assoc Comput Linguist 7:139\u2013155","journal-title":"Trans Assoc Comput Linguist"},{"key":"735_CR62","doi-asserted-by":"crossref","unstructured":"Pennington J, Socher R, Manning C (2014) Glove: global vectors for word representation. In: Proceedings of the 2014 conference on empirical methods in natural language processing (EMNLP 2014), Doha, QA, pp 1532\u20131543","DOI":"10.3115\/v1\/D14-1162"},{"key":"735_CR63","doi-asserted-by":"crossref","unstructured":"Peters ME, Neumann M, Iyyer M, Gardner M, Clark C, Lee K, Zettlemoyer L (2018) Deep contextualized word representations. In: Proceedings of the 2018 conference of the North American chapter of the association for computational linguistics (NAACL 2018), New Orleans, US, pp 2227\u20132237","DOI":"10.18653\/v1\/N18-1202"},{"key":"735_CR64","doi-asserted-by":"crossref","unstructured":"Ren H, Zeng Z, Cai Y, Du Q, Li Q, Xie H (2019) A weighted word embedding model for text classification. In: Proceedings of the 24th international conference on database systems for advanced applications (DASFAA 2019), Chiang Mai, TH, pp 419\u2013434","DOI":"10.1007\/978-3-030-18576-3_25"},{"key":"735_CR65","unstructured":"Riloff E, Wiebe J, Phillips W (2005) Exploiting subjectivity classification to improve information extraction. In: Proceedings of the 12th conference of the american association for artificial intelligence (AAAI 2005), Pittsburgh, US, pp 1106\u20131111"},{"issue":"6088","key":"735_CR66","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1038\/323533a0","volume":"323","author":"DE Rumelhart","year":"1986","unstructured":"Rumelhart DE, Hinton GE, Williams RJ (1986) Learning representations by back-propagating errors. Nature 323(6088):533\u2013536. https:\/\/doi.org\/10.1038\/323533a0","journal-title":"Nature"},{"issue":"1","key":"735_CR67","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1162\/089976602753284446","volume":"14","author":"M Saerens","year":"2002","unstructured":"Saerens M, Latinne P, Decaestecker C (2002) Adjusting the outputs of a classifier to new a priori probabilities: a simple procedure. Neural Comput 14(1):21\u201341. https:\/\/doi.org\/10.1162\/089976602753284446","journal-title":"Neural Comput"},{"key":"735_CR68","unstructured":"Sahlgren M (2005) An introduction to random indexing. In: Proceedings of the TKE workshop on methods and applications of semantic indexing, Copenhagen, DK"},{"key":"735_CR69","unstructured":"Socher R, Perelygin A, Wu J, Chuang J, Manning CD, Ng A, Potts C (2013) Recursive deep models for semantic compositionality over a sentiment treebank. In: Proceedings of the 2013 conference on empirical methods in natural language processing (EMNLP 2013), Seattle, US, pp 1631\u20131642"},{"key":"735_CR70","unstructured":"Soucy P, Mineau GW (2005) Beyond TFIDF weighting for text categorization in the vector space model. In: Proceedings of the 19th international joint conference on artificial intelligence (IJCAI 2005), Edinburgh, UK, pp 1130\u20131135"},{"key":"735_CR71","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava N, Hinton G, Krizhevsky A, Sutskever I, Salakhutdinov R (2014) Dropout: a simple way to prevent neural networks from overfitting. J Mach Learn Res 15:1929\u20131958","journal-title":"J Mach Learn Res"},{"key":"735_CR72","unstructured":"Steinberger R, Pouliquen B, Widiger A, Ignat C, Erjavec T, Tufis D, Varga D (2006) The JRC-Acquis: a multilingual aligned parallel corpus with 20+ languages. In: Proceedings of the 5th international conference on language resources and evaluation (LREC 2006), Genova, IT, pp 2142\u20132147"},{"key":"735_CR73","doi-asserted-by":"crossref","unstructured":"Tang J, Qu M, Mei Q (2015) PTE: Predictive text embedding through large-scale heterogeneous text networks. In: Proceedings of the 21st ACM international conference on knowledge discovery and data mining (KDD 2015), Sydney, AU, pp 1165\u20131174","DOI":"10.1145\/2783258.2783307"},{"key":"735_CR74","first-page":"2579","volume":"9","author":"L van der Maaten","year":"2008","unstructured":"van der Maaten L, Hinton G (2008) Visualizing data using t-SNE. J Mach Learn Res 9:2579\u20132605","journal-title":"J Mach Learn Res"},{"key":"735_CR75","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser L, Polosukhin I (2017) Attention is all you need. In: Proceedings of the 31st annual conference on neural information processing systems (NIPS 2017), Long Beach, US, pp 5998\u20136008"},{"key":"735_CR76","doi-asserted-by":"crossref","unstructured":"Wang G, Li C, Wang W, Zhang Y, Shen D, Zhang X, Henao R, Carin L (2018) Joint embedding of words and labels for text classification. In: Proceedings of the 56th annual meeting of the association for computational linguistics (ACL 2018), Melbourne, AU, pp 2321\u20132331","DOI":"10.18653\/v1\/P18-1216"},{"key":"735_CR77","unstructured":"Wang S, Manning CD (2012) Baselines and bigrams: simple, good sentiment and topic classification. In: Proceedings of the 50th annual meeting of the association for computational linguistics (ACL 2012), Jeju Island, KR, pp 90\u201394"},{"issue":"3","key":"735_CR78","doi-asserted-by":"publisher","first-page":"252","DOI":"10.1145\/183422.183424","volume":"12","author":"Y Yang","year":"1994","unstructured":"Yang Y, Chute CG (1994) An example-based mapping method for text categorization and retrieval. ACM Trans Inf Syst 12(3):252\u2013277","journal-title":"ACM Trans Inf Syst"},{"key":"735_CR79","unstructured":"Yang Z, Dai Z, Yang Y, Carbonell JG, Salakhutdinov R, Le QV (2019b) XLNet: generalized autoregressive pretraining for language understanding. In: Proceedings of the 33rd annual conference on neural information processing systems (NeurIPS 2019), Vancouver, CA, pp 5754\u20135764"},{"key":"735_CR80","doi-asserted-by":"publisher","unstructured":"Yang W, Lu K, Yang P, Lin J (2019a) Critically examining the \u201cneural hype\u201d: weak baselines and the additivity of effectiveness gains from neural ranking models. In: Proceedings of the 42nd ACM conference on research and development in information retrieval (SIGIR 2019), Paris, FR, pp 1129\u20131132. https:\/\/doi.org\/10.1145\/3331184.3331340","DOI":"10.1145\/3331184.3331340"},{"key":"735_CR81","doi-asserted-by":"publisher","unstructured":"Yao S, Yu D, Xiao K (2019) Enhancing domain word embedding via latent semantic imputation. In: Proceedings of the 25th ACM conference on knowledge discovery and data mining (KDD 2019), Anchorage, US, pp 557\u2013565. https:\/\/doi.org\/10.1145\/3292500.3330926","DOI":"10.1145\/3292500.3330926"},{"key":"735_CR82","unstructured":"Yu HF, Jain P, Kar P, Dhillon I (2014) Large-scale multi-label learning with missing labels. In: Proceedings of the 31st international conference on machine learning (ICML 2014), Beijing, CN, pp 593\u2013601"},{"issue":"4","key":"735_CR83","doi-asserted-by":"publisher","first-page":"e1253","DOI":"10.1002\/widm.1253","volume":"8","author":"L Zhang","year":"2018","unstructured":"Zhang L, Wang S, Liu B (2018) Deep learning for sentiment analysis: a survey. Wiley Interdiscip Rev Data Min Knowl Discov 8(4):e1253. https:\/\/doi.org\/10.1002\/widm.1253","journal-title":"Wiley Interdiscip Rev Data Min Knowl Discov"},{"key":"735_CR84","unstructured":"Zhang X, Zhao J, LeCun Y (2015) Character-level convolutional networks for text classification. In: Proceedings of the 29th annual conference on neural information processing systems (NIPS 2015), Montreal, CA, pp 649\u2013657"}],"container-title":["Data Mining and Knowledge Discovery"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-020-00735-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10618-020-00735-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-020-00735-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,18]],"date-time":"2022-12-18T07:17:40Z","timestamp":1671347860000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10618-020-00735-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,2,19]]},"references-count":84,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2021,5]]}},"alternative-id":["735"],"URL":"https:\/\/doi.org\/10.1007\/s10618-020-00735-3","relation":{},"ISSN":["1384-5810","1573-756X"],"issn-type":[{"type":"print","value":"1384-5810"},{"type":"electronic","value":"1573-756X"}],"subject":[],"published":{"date-parts":[[2021,2,19]]},"assertion":[{"value":"9 September 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 December 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 February 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}