{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T19:07:52Z","timestamp":1722539272441},"reference-count":80,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2018,9,26]],"date-time":"2018-09-26T00:00:00Z","timestamp":1537920000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100004329","name":"Javna Agencija za Raziskovalno Dejavnost RS","doi-asserted-by":"publisher","award":["J6\u20136842","J7-8280"],"id":[{"id":"10.13039\/501100004329","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Lang Resources & Evaluation"],"published-print":{"date-parts":[[2020,3]]},"DOI":"10.1007\/s10579-018-9425-z","type":"journal-article","created":{"date-parts":[[2018,9,26]],"date-time":"2018-09-26T06:45:35Z","timestamp":1537944335000},"page":"223-246","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["The Janes project: language resources and tools for Slovene user generated content"],"prefix":"10.1007","volume":"54","author":[{"ORCID":"http:\/\/orcid.org\/0000-0002-9956-1689","authenticated-orcid":false,"given":"Darja","family":"Fi\u0161er","sequence":"first","affiliation":[]},{"given":"Nikola","family":"Ljube\u0161i\u0107","sequence":"additional","affiliation":[]},{"given":"Toma\u017e","family":"Erjavec","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,9,26]]},"reference":[{"key":"9425_CR1","unstructured":"Arhar\u00a0Holdt, \u0160., Erjavec, T., & Fi\u0161er, D. (2017). CMC training corpus Janes-Syn 1.0. Slovenian language resource repository CLARIN.SI. http:\/\/hdl.handle.net\/11356\/1086 ."},{"key":"9425_CR2","unstructured":"Arhar Holdt, \u0160., Fi\u0161er, D., Erjavec, T., & Krek, S. (2016). Syntactic annotation of Slovene CMC: First steps. In Proceedings of the 4th conference on CMC and social media corpora for the humanities (pp. 3\u20136)."},{"key":"9425_CR3","doi-asserted-by":"crossref","unstructured":"Barbieri, F., Basile, V., Croce, D., Nissim, M., Novielli, N., & Patti, V. (2016). Overview of the EVALITA 2016 SENTiment POLarity classification task. In Proceedings of third Italian conference on computational linguistics (CLiC-it 2016) & fifth evaluation campaign of natural language processing and speech tools for Italian. Final Workshop (EVALITA 2016).","DOI":"10.4000\/books.aaccademia.1992"},{"key":"9425_CR4","unstructured":"Baron, A., & Rayson, P. (2008). VARD 2: A tool for dealing with spelling variation in historical corpora. In: Proceedings of the postgraduate conference in corpus linguistics. Birmingham: Aston University."},{"issue":"1","key":"9425_CR5","doi-asserted-by":"crossref","first-page":"157","DOI":"10.21248\/jlcl.28.2013.172","volume":"28","author":"T Bartz","year":"2014","unstructured":"Bartz, T., Bei\u00dfwenger, M., & Storrer, A. (2014). Optimierung des Stuttgart\u2013T\u00fcbingen\u2013Tagset f\u00fcr die linguistische Annotation von Korpora zur internetbasierten Kommunikation: Ph\u00e4nomene, Herausforderungen, Erweiterungsvorschl\u00e4ge. Journal for Language Technology and Computational Linguistics, 28(1), 157\u2013198.","journal-title":"Journal for Language Technology and Computational Linguistics"},{"key":"9425_CR6","unstructured":"Bei\u00dfwenger, M., Ehrhardt, E., Horbach, A., L\u00fcngen, H., Steffen, D., & Storrer, A. (2015). Adding value to CMC corpora: CLARINification and part-of-speech annotation of the Dortmund chat corpus. In Proceedings of the 2nd workshop on natural language processing for computer-mediated communication\/social media (NLP4CMC2015) (pp. 12\u201316)."},{"key":"9425_CR7","doi-asserted-by":"crossref","unstructured":"Bei\u00dfwenger, M., Ermakova, M., Geyken, A., Lemnitzer, L., & Storrer, A. (2012). A TEI schema for the representation of computer-mediated communication. Journal of the Text Encoding Initiative, 3.","DOI":"10.4000\/jtei.476"},{"key":"9425_CR8","first-page":"292","volume-title":"Corpus linguistics: An international handbook","author":"M Bei\u00dfwenger","year":"2008","unstructured":"Bei\u00dfwenger, M., & Storrer, A. (2008). Corpora of computer-mediated communication. In A. L\u00fcdeling & M. Kyto (Eds.), Corpus linguistics: An international handbook (pp. 292\u2013308). Berlin: Mouton de Gruyter."},{"key":"9425_CR9","doi-asserted-by":"crossref","unstructured":"Bollmann, M., Bingel, J., & S\u00f8gaard, A. (2017). Learning attention for historical text normalization by learning to pronounce. In ACL (Vol. 1, pp. 332\u2013344). Association for Computational Linguistics.","DOI":"10.18653\/v1\/P17-1031"},{"issue":"4","key":"9425_CR10","first-page":"467","volume":"18","author":"PF Brown","year":"1992","unstructured":"Brown, P. F., Desouza, P. V., Mercer, R. L., Pietra, V. J. D., & Lai, J. C. (1992). Class-based n-gram models of natural language. Computational Linguistics, 18(4), 467\u2013479.","journal-title":"Computational Linguistics"},{"issue":"2","key":"9425_CR11","doi-asserted-by":"crossref","first-page":"1","DOI":"10.21248\/jlcl.29.2014.187","volume":"29","author":"T Chanier","year":"2014","unstructured":"Chanier, T., Poudat, C., Sagot, B., Antoniadis, G., Wigham, C. R., Hriba, L., et al. (2014). The CoMeRe corpus for French: Structuring and annotating heterogeneous CMC genres. JLCL-Journal for Language Technology and Computational Linguistics, 29(2), 1\u201330.","journal-title":"JLCL-Journal for Language Technology and Computational Linguistics"},{"key":"9425_CR12","unstructured":"Derczynski, L., Bontcheva, K., & Roberts, I. (2016). Broad twitter corpus: A diverse named entity recognition resource. In COLING (pp. 1169\u20131179)."},{"issue":"2","key":"9425_CR13","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1016\/j.ipm.2014.10.006","volume":"51","author":"L Derczynski","year":"2015","unstructured":"Derczynski, L., Maynard, D., Rizzo, G., van Erp, M., Gorrell, G., Troncy, R., et al. (2015). Analysis of named entity recognition and linking for tweets. Information Processing & Management, 51(2), 32\u201349.","journal-title":"Information Processing & Management"},{"key":"9425_CR14","unstructured":"Dobrovoljc, K., Krek, S., Holozan, P., Erjavec, T., & Romih, M. (2015). Morphological lexicon Sloleks 1.2. Slovenian language resource repository CLARIN.SI. http:\/\/hdl.handle.net\/11356\/1039 ."},{"key":"9425_CR15","unstructured":"Dobrovoljc, K., Krek, S., & Rupnik, J. (2012). Skladenjski raz\u010dlenjevalnik za sloven\u0161\u010dino [A syntax parser for Slovene]. In Proceedings of the eight conference on language technologies (pp. 42\u201347). Jo\u017eef Stefan Institute. http:\/\/nl.ijs.si\/isjt12\/proceedings\/index-en.html ."},{"key":"9425_CR16","doi-asserted-by":"publisher","first-page":"299","DOI":"10.1093\/acprof:oso\/9780199795437.003.0014","volume-title":"Digital discourse. Language in the new media","author":"C D\u00fcrscheid","year":"2011","unstructured":"D\u00fcrscheid, C., & Stark, E. (2011). SMS4science: An international corpus-based texting project and the specific challenges for multilingual Switzerland. In C. Thurlow & K. Mroczek (Eds.), Digital discourse. Language in the new media (pp. 299\u2013320). Oxford: Oxford University Press."},{"issue":"1","key":"9425_CR17","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1007\/s10579-011-9174-8","volume":"46","author":"T Erjavec","year":"2012","unstructured":"Erjavec, T. (2012). MULTEXT-East: Morphosyntactic resources for Central and Eastern European languages. Language Resources and Evaluation, 46(1), 131\u2013142.","journal-title":"Language Resources and Evaluation"},{"issue":"3","key":"9425_CR18","doi-asserted-by":"publisher","first-page":"753","DOI":"10.1007\/s10579-015-9294-7","volume":"49","author":"T Erjavec","year":"2015","unstructured":"Erjavec, T. (2015). The IMP historical Slovene language resources. Language Resources and Evaluation, 49(3), 753\u2013775.","journal-title":"Language Resources and Evaluation"},{"key":"9425_CR19","unstructured":"Erjavec, T., Arhar Holdt, \u0160., \u010cibej, J., Dobrovoljc, K., Fi\u0161er, D., Laskowski, C., & Zupan, K. (2016). Annotating CLARIN.SI TEI corpora with WebAnno. In Proceedings of the CLARIN annual conference (pp. 1\u20135)."},{"key":"9425_CR20","unstructured":"Erjavec, T., \u010cibej, J., Arhar Holdt, \u0160., Ljube\u0161i\u0107, N., & Fi\u0161er, D. (2016). Gold-standard datasets for annotation of Slovene computer-mediated communication. In Proceedings of RASLAN 2016: Recent advances in Slavonic natural language processing (pp. 29\u201340). Brno: Tribun EU."},{"key":"9425_CR21","unstructured":"Erjavec, T., Fi\u0161er, D., Krek, S., & Ledinek, N. (2010). The JOS linguistically tagged corpus of Slovene. In Proceedings of the seventh international conference on language resources and evaluation (LREC\u201910). Valletta: European Language Resources Association (ELRA)."},{"key":"9425_CR22","unstructured":"Erjavec, T., Fi\u0161er, D., \u010cibej, J., & Arhar\u00a0Holdt, \u0160. (2016). CMC training corpus Janes-Norm 1.2. Slovenian language resource repository CLARIN.SI. http:\/\/hdl.handle.net\/11356\/1084 ."},{"key":"9425_CR23","unstructured":"Erjavec, T., Fi\u0161er, D., \u010cibej, J., Arhar\u00a0Holdt, \u0160., & Ljube\u0161i\u0107, N. (2016). CMC training corpus Janes-Tag 1.2. Slovenian language resource repository CLARIN.SI. http:\/\/hdl.handle.net\/11356\/1085 ."},{"key":"9425_CR24","unstructured":"Erjavec, T., Ignat, C., Poliquen, B., & Steinberger, R. (2005). Massive multilingual corpus compilation: Acquis communautaire and ToTaLe. In The 2nd language & technology conference: Human language technologies as a challenge for computer science and linguistics. Association for Computing Machinery (ACM) and UAM Fundacja."},{"issue":"1","key":"9425_CR25","first-page":"35","volume":"39","author":"T Erjavec","year":"2015","unstructured":"Erjavec, T., Ljube\u0161i\u0107, N., & Logar, N. (2015). The slWaC corpus of the Slovene web. Informatica, 39(1), 35.","journal-title":"Informatica"},{"key":"9425_CR26","unstructured":"Erjavec, T., Ljube\u0161i\u0107, N., & Fi\u0161er, D. (2017). Blog post and comment corpus Janes-Blog 1.0. Slovenian language resource repository CLARIN.SI. http:\/\/hdl.handle.net\/11356\/1138 ."},{"key":"9425_CR27","unstructured":"Erjavec, T., Ljube\u0161i\u0107, N., & Fi\u0161er, D. (2017). Forum corpus Janes-Forum 1.0. Slovenian language resource repository CLARIN.SI. http:\/\/hdl.handle.net\/11356\/1139 ."},{"key":"9425_CR28","unstructured":"Erjavec, T., Ljube\u0161i\u0107, N., & Fi\u0161er, D. (2017). News comment corpus Janes-News 1.0. Slovenian language resource repository CLARIN.SI. http:\/\/hdl.handle.net\/11356\/1140 ."},{"key":"9425_CR29","doi-asserted-by":"crossref","unstructured":"Fi\u0161er, D., Erjavec, T., & Ljube\u0161i\u0107, N. (2017). Legal framework, dataset and annotation schema for socially unacceptable online discourse practices in Slovene. In Proceedings of the first workshop on abusive language online (pp. 46\u201351).","DOI":"10.18653\/v1\/W17-3007"},{"key":"9425_CR30","unstructured":"Fi\u0161er, D. (2018). Viri, orodja in metode za analizo spletne sloven\u0161\u010dine. Znanstvena zalo\u017eba Filozofske fakultete Univerze v Ljubljani."},{"key":"9425_CR31","unstructured":"Fi\u0161er, D., Smailovi\u0107, J., Erjavec, T., Gr\u010dar, M., & Mozeti\u010d, I. (2016). Sentiment annotation of the Janes corpus of Slovene user-generated content. In Proceedings of the conference on language technologies and digital humanities (pp. 65\u201370). Ljubljana: Academic Publishing Division of the Faculty of Arts."},{"key":"9425_CR32","doi-asserted-by":"crossref","unstructured":"Frey, J. C., Glaznieks, A., & Stemle, E. W. (2016). The DiDi corpus of south Tyrolean CMC data: A multilingual corpus of Facebook texts. In CLIC-it.","DOI":"10.4000\/books.aaccademia.1782"},{"key":"9425_CR33","unstructured":"Gimpel, K., Schneider, N., O\u2019Connor, B., Das, D., Mills, D., Eisenstein, J., Heilman, M., Yogatama, D., Flanigan, J., & Smith, N. A. (2011). Part-of-speech tagging for twitter: Annotation, features, and experiments. In Proceedings of the 49th annual meeting of the association for computational linguistics: Human language technologies: Short papers, HLT \u201911 (Vol. 2, pp. 42\u201347). Stroudsburg: Association for Computational Linguistics."},{"key":"9425_CR34","unstructured":"Goli, T., Osrajnik, E., Fi\u0161er, D., & Erjavec, T. (2017). CMC shortening corpus Janes-Kratko 1.0. Slovenian language resource repository CLARIN.SI. http:\/\/hdl.handle.net\/11356\/1087 ."},{"key":"9425_CR35","unstructured":"Goli, T., Osrajnik, E., & Fi\u0161er, D. (2016). Analiza kraj\u0161anja slovenskih sporo\u010dil na dru\u017ebenem omre\u017eju Twitter. In T.\u00a0Erjavec, D.\u00a0Fi\u0161er (Eds.), Proceedings of the conference on language technologies and digital humanities (pp. 77\u201382). Ljubljana: Academic Publishing Division of the Faculty of Arts."},{"key":"9425_CR36","unstructured":"Gr\u010dar, M., Krek, S., & Dobrovoljc, K. (2012). Obeliks: statisti\u010dni oblikoskladenjski ozna\u010devalnik in lematizator za slovenski jezik [obeliks: a statistical morphosyntactic tagger and lemmatiser for slovene]. Ljubljana, Slovenia: In Zbornik Osme konference Jezikovne tehnologije."},{"key":"9425_CR37","unstructured":"Holozan, P., Krek, S., Pivec, M., Riga\u010d, S., Rozman, S., & Velu\u0161\u010dek, A. (2008). Specifikacije za u\u010dni korpus. Projekt \u201cSporazumevanje v slovenskem jeziku\u201d [Specifications for the training corpus. The \u201ccommunication in Slovene\u201d project]. Technical report, Amebis, d.o.o. http:\/\/www.slovenscina.eu\/Vsebine\/Sl\/Kazalniki\/K2.aspx ."},{"key":"9425_CR38","unstructured":"Horsmann, T., & Zesch, T. (2016). Building a social media adapted pos tagger using flextag: A case study on Italian Tweets. In Fifth evaluation campaign of natural language processing and speech tools for Italian: EVALITA 2016 (pp. 95\u201398). Naples. http:\/\/www.ltl.uni-due.de\/wp-content\/uploads\/horsmannZesch_evalita2016.pdf.pdf ."},{"key":"9425_CR39","unstructured":"Huang, Z., Xu, W., & Yu, K. (2015). Bidirectional LSTM-CRF models for sequence tagging. CoRR arXiv:abs\/1508.01991 ."},{"key":"9425_CR40","unstructured":"Johansson, R., Adesam, Y., Bouma, G., & Hedberg, K. (2016). A multi-domain corpus of Swedish word sense annotation. In LREC."},{"key":"9425_CR41","unstructured":"Koehn, P., & Knowles, R. (2017). Six challenges for neural machine translation. In Proceedings of the first workshop on neural machine translation (pp. 28\u201339). Association for Computational Linguistics. http:\/\/aclweb.org\/anthology\/W17-3204 ."},{"key":"9425_CR42","unstructured":"Krek, S., Erjavec, T., Dobrovoljc, K., Mo\u017ee, S., Ledinek, N., & Holz, N. (2013). Training corpus ssj500k 1.3. Slovenian language resource repository CLARIN.SI. http:\/\/hdl.handle.net\/11356\/1029 ."},{"key":"9425_CR43","volume-title":"Content analysis: An introduction to its methodology","author":"K Krippendorff","year":"2012","unstructured":"Krippendorff, K. (2012). Content analysis: An introduction to its methodology (3rd ed.). Thousand Oaks, CA: Sage Publications.","edition":"3"},{"key":"9425_CR44","unstructured":"Lagus, K. H., Ruckenstein, M. S., Pantzar, M., & Ylisiurua, M. J., et\u00a0al. (2016). Suomi24. Helsingin yliopisto."},{"key":"9425_CR45","unstructured":"Lebar, L., Petrov\u010di\u010d, A., & Petri\u010d, G. (2012). Analiza slovenskih spletnih forumov. poro\u010dilo."},{"key":"9425_CR46","series-title":"Mining opinions, sentiments, and emotions","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781139084789","volume-title":"Sentiment analysis","author":"B Liu","year":"2015","unstructured":"Liu, B. (2015). Sentiment analysis., Mining opinions, sentiments, and emotions Cambridge: Cambridge University Press."},{"key":"9425_CR47","unstructured":"Ljube\u0161i\u0107, N., & Erjavec, T. (2016). Corpus vs. lexicon supervision in morphosyntactic tagging: The case of slovene. In Proceedings of the tenth international conference on language resources and evaluation (LREC 2016). Paris: European Language Resources Association (ELRA)."},{"key":"9425_CR48","unstructured":"Ljube\u0161i\u0107, N., Erjavec, T., & Fi\u0161er, D. (2016). Corpus-based diacritic restoration for south slavic languages. In Proceedings of the tenth international conference on language resources and evaluation (LREC 2016). Paris: European Language Resources Association (ELRA)."},{"key":"9425_CR49","unstructured":"Ljube\u0161i\u0107, N. (2018). Comparing CRF and LSTM performance on the task of morphosyntactic tagging of non-standard varieties of South Slavic languages. In Proceedings of the fifth workshop on NLP for similar languages, varieties and dialects (VarDial). Santa Fe, USA."},{"key":"9425_CR50","unstructured":"Ljube\u0161i\u0107, N., Erjavec, T., & Fi\u0161er, D. (2014). Standardizing tweets with character-level machine translation. In Proceedings of CICLing 2014 (pp. 164\u201375). Lecture notes in computer science. Kathmandu: Springer."},{"key":"9425_CR51","doi-asserted-by":"crossref","unstructured":"Ljube\u0161i\u0107, N., Erjavec, T., & Fi\u0161er, D. (2017). Adapting a state-of-the-art tagger for south Slavic languages to non-standard text. In Proceedings of the 6th Workshop on Balto-Slavic natural language processing (pp. 60\u201368).","DOI":"10.18653\/v1\/W17-1410"},{"key":"9425_CR52","unstructured":"Ljube\u0161i\u0107, N., Erjavec, T., & Fi\u0161er, D. (2017). Twitter corpus Janes-Tweet 1.0. Slovenian language resource repository CLARIN.SI. http:\/\/hdl.handle.net\/11356\/1142 ."},{"key":"9425_CR53","unstructured":"Ljube\u0161i\u0107, N., Erjavec, T., & Fi\u0161er, D. (2017). Wikipedia talk corpus Janes-Wiki 1.0. Slovenian language resource repository CLARIN.SI. http:\/\/hdl.handle.net\/11356\/1137 ."},{"key":"9425_CR54","unstructured":"Ljube\u0161i\u0107, N., & Fi\u0161er, D. (2016a). Private or corporate? Predicting user types on twitter. In Proceedings of the 2nd workshop on noisy user-generated text (WNUT) (pp. 4\u201312)."},{"key":"9425_CR55","unstructured":"Ljube\u0161i\u0107, N., & Fi\u0161er, D. (2016b). Slovene Twitter analytics. In Proceedings of the 4th conference on CMC and social media corpora for the humanities."},{"key":"9425_CR56","unstructured":"Ljube\u0161i\u0107, N., Fi\u0161er, D., & Erjavec, T. (2014). TweetCaT: A tool for building Twitter corpora of smaller languages. In Proceedings of the ninth international conference on language resources and evaluation (LREC\u201914). Reykjavik: European Language Resources Association (ELRA)."},{"key":"9425_CR57","unstructured":"Ljube\u0161i\u0107, N., Fi\u0161er, D., Erjavec, T., \u010cibej, J., Marko, D., Pollak, S., & \u0160krjanec, I. (2015). Predicting the level of text standardness in user-generated content. In Proceedings of recent advances in natural language processing."},{"key":"9425_CR58","unstructured":"Ljube\u0161i\u0107, N., Zupan, K., Fi\u0161er, D., & Erjavec, T. (2016). Normalising Slovene data: Historical texts vs. user-generated content. In Proceedings of KONVENS."},{"key":"9425_CR59","unstructured":"Logar, N., Gr\u010dar, M., Brakus, M., Erjavec, T., Arhar\u00a0Holdt, \u0160., Krek, S., & Kosem, I. (2012). Korpusi slovenskega jezika Gigafida, KRES, ccGigafida in ccKRES: gradnja, vsebina, uporaba. Trojina, zavod za uporabno slovenistiko."},{"key":"9425_CR60","unstructured":"Logar Berginc, N., Gr\u010dar, M., Brakus, M., Erjavec, T., Arhar Holdt, \u0160., & Krek, S. (2012). Korpusi slovenskega jezika Gigafida, KRES, ccGigafida in ccKRES: gradnja, vsebina, uporaba [The Gigafida, KRES, ccGigafida and ccKRES corpora of Slovene language: compilation, content, use]. Zbirka Sporazumevanje. Trojina, zavod za uporabno slovenistiko: Fakulteta za dru\u017ebene vede, Ljubljana."},{"issue":"2","key":"9425_CR61","doi-asserted-by":"crossref","first-page":"59","DOI":"10.21248\/jlcl.29.2014.189","volume":"29","author":"E Margaretha","year":"2014","unstructured":"Margaretha, E., & L\u00fcngen, H. (2014). Building linguistic corpora from wikipedia articles and discussions. JLCL, 29(2), 59\u201382.","journal-title":"JLCL"},{"key":"9425_CR62","doi-asserted-by":"crossref","unstructured":"Metzler, D., Dumais, S., & Meek, C. (2007). Similarity measures for short segments of text. In European conference on information retrieval (pp. 16\u201327). Springer.","DOI":"10.1007\/978-3-540-71496-5_5"},{"issue":"5","key":"9425_CR63","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1371\/journal.pone.0155036","volume":"11","author":"I Mozeti\u010d","year":"2016","unstructured":"Mozeti\u010d, I., Gr\u010dar, M., & Smailovi\u0107, J. (2016). Multilingual twitter sentiment classification: The role of human annotators. PLoS ONE, 11(5), 1\u201326. https:\/\/doi.org\/10.1371\/journal.pone.0155036 .","journal-title":"PLoS ONE"},{"key":"9425_CR64","unstructured":"Popi\u010d, D., Zupan, K., Logar, P., Kav\u010di\u010d, T., Erjavec, T., & Fi\u0161er, D. (2017). Tweet comma corpus Janes-Vejica 1.0. Slovenian language resource repository CLARIN.SI. http:\/\/hdl.handle.net\/11356\/1088 ."},{"key":"9425_CR65","unstructured":"Popi\u010d, D., & Fi\u0161er, D. (2018). (ne)Normativnost ra\u010dunalni\u0161ko posredovane komunikacije v sloven\u0161\u010dini: merilo vejice. Viri, orodja in metode za analizo spletne sloven\u0161\u010dine (pp. 140\u2013159)."},{"key":"9425_CR66","volume-title":"Natural language annotation for machine learning","author":"J Pustejovsky","year":"2012","unstructured":"Pustejovsky, J., & Stubbs, A. (2012). Natural language annotation for machine learning. Sebastopol: O\u2019Reilly Media."},{"key":"9425_CR67","unstructured":"Reher, \u0160., & Fi\u0161er, D. (2018). Kodno preklapljanje v objavah slovenskih uporabnikov twitterja. Viri, orodja in metode za analizo spletne sloven\u0161\u010dine (pp. 294\u2013323)."},{"key":"9425_CR68","unstructured":"Reher, \u0160., Toma\u017e, & Fi\u0161er, D. (2017). Tweet code-switching corpus Janes-Preklop 1.0. Slovenian language resource repository CLARIN.SI. http:\/\/hdl.handle.net\/11356\/1154 ."},{"key":"9425_CR69","unstructured":"Rei, L., Krek, S., & Mladeni\u0107, D. (2016). xLiMe Twitter corpus XTC 1.0.1. Slovenian language resource repository CLARIN.SI. http:\/\/hdl.handle.net\/11356\/1078 ."},{"key":"9425_CR70","unstructured":"Rychl\u00fd, P. (2007). Manatee\/Bonito: A modular corpus manager. In: 1st workshop on recent advances in Slavonic natural language processing (pp. 65\u201370). Brno: Masarykova univerzita."},{"key":"9425_CR71","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/j.ins.2014.04.034","volume":"285","author":"J Smailovi\u0107","year":"2014","unstructured":"Smailovi\u0107, J., Gr\u010dar, M., Lavra\u010d, N., & \u017dnidar\u0161i\u010d, M. (2014). Stream-based active learning for sentiment analysis in the financial domain. Information Sciences, 285, 181\u2013203.","journal-title":"Information Sciences"},{"key":"9425_CR72","unstructured":"TEI\u00a0Consortium. (2017). TEI P5: Guidelines for electronic text encoding and interchange. TEI Consortium. http:\/\/www.tei-c.org\/Guidelines\/P5\/ ."},{"key":"9425_CR73","unstructured":"Tjong Kim\u00a0Sang, E., Bollmann, M., Boschker, R., Casacuberta, F., Dietz, F., Dipper, S., Domingo, M., van\u00a0der Goot, R., van Koppen, M., Ljube\u0161i\u0107, N., \u00d6stling, R., Petran, F., Pettersson, E., Scherrer, Y., Schraagen, M., Sevens, L., Tiedemann, J., Vanallemeersch, T., & Zervanou, K. (2017). The clin27 shared task: Translating historical text to contemporary language for improving automatic linguistic annotation. Computational Linguistics in the Netherlands Journal, 7, 53\u201364. http:\/\/www.clinjournal.org\/sites\/clinjournal.org\/files\/04.clin27-shared-task.pdf ."},{"key":"9425_CR74","unstructured":"\u010cibej, J., \u0160pela Arhar Holdt, Erjavec, T., & Fi\u0161er, D. (2016). Razvoj u\u010dne mno\u017eice za izbolj\u0161ano ozna\u010devanje spletnih besedil [The developoment of a training dataset for better annotation of web texts]. In Proceedings of the conference on language technologies and digital humanities (pp. 40\u201346). Ljubljana: Academic Publishing Division of the Faculty of Arts."},{"issue":"4","key":"9425_CR75","doi-asserted-by":"publisher","first-page":"1031","DOI":"10.1007\/s10579-013-9216-5","volume":"47","author":"D Verdonik","year":"2013","unstructured":"Verdonik, D., Kosem, I., Vitez, A. Z., Krek, S., & Stabej, M. (2013). Compilation, transcription and usage of a reference speech corpus: The case of the Slovene corpus GOS. Language Resources and Evaluation, 47(4), 1031\u20131048.","journal-title":"Language Resources and Evaluation"},{"key":"9425_CR76","doi-asserted-by":"crossref","unstructured":"Verhoeven, B., \u0160krjanec, I., & Pollak, S. (2017). Gender profiling for Slovene Twitter communication: The influence of gender marking, content and style. In The 6th workshop on Balto-Slavic natural language processing, BSNLP 2017 (p. 119).","DOI":"10.18653\/v1\/W17-1418"},{"key":"9425_CR77","unstructured":"Vickery, G., & Wunsch-Vincent, S. (2007). Participative web and user-created content: Web 2.0 wikis and social networking. Paris: Organization for Economic Cooperation and Development (OECD)"},{"issue":"1","key":"9425_CR78","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1111\/j.1083-6101.2012.01592.x","volume":"18","author":"JB Walther","year":"2012","unstructured":"Walther, J. B., & Jang, Jw. (2012). Communication processes in participatory websites. Journal of Computer-Mediated Communication, 18(1), 2\u201315.","journal-title":"Journal of Computer-Mediated Communication"},{"key":"9425_CR79","unstructured":"Yimam, S. M., Gurevych, I., de\u00a0Castilho, R. E., & Biemann, C. (2013). Webanno: A flexible,web-based and visually supported system for distributed annotations. In Proceedings of the 51st annual meeting of the association for computational linguistics (system demonstrations) (ACL 2013) (pp. 1\u20136). Association for Computational Linguistics, Stroudsburg, PA, USA."},{"key":"9425_CR80","unstructured":"Zampieri, M., Malmasi, S., Nakov, P., Ali, A., Shuon, S., Glass, J., Scherrer, Y., Samard\u017ei\u0107, T., Ljube\u0161i\u0107, N., Tiedemann, J., van der Lee, C., Grondelaers, S., Oostdijk, N., van den Bosch, A., Kumar, R., Lahiri, B., & Jain, M. (2018). Language identification and morphosyntactic tagging: The second VarDial evaluation campaign. In Proceedings of the fifth workshop on nlp for similar languages, varieties and dialects (VarDial). Santa Fe, USA."}],"container-title":["Language Resources and Evaluation"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10579-018-9425-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10579-018-9425-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10579-018-9425-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,5]],"date-time":"2023-09-05T11:36:47Z","timestamp":1693913807000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10579-018-9425-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,9,26]]},"references-count":80,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2020,3]]}},"alternative-id":["9425"],"URL":"https:\/\/doi.org\/10.1007\/s10579-018-9425-z","relation":{},"ISSN":["1574-020X","1574-0218"],"issn-type":[{"value":"1574-020X","type":"print"},{"value":"1574-0218","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,9,26]]},"assertion":[{"value":"26 September 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}