{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,8,6]],"date-time":"2024-08-06T12:34:13Z","timestamp":1722947653795},"reference-count":55,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2022,5,1]],"date-time":"2022-05-01T00:00:00Z","timestamp":1651363200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2022,5,1]],"date-time":"2022-05-01T00:00:00Z","timestamp":1651363200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2022,5,1]],"date-time":"2022-05-01T00:00:00Z","timestamp":1651363200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2022,5,1]],"date-time":"2022-05-01T00:00:00Z","timestamp":1651363200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2022,5,1]],"date-time":"2022-05-01T00:00:00Z","timestamp":1651363200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,5,1]],"date-time":"2022-05-01T00:00:00Z","timestamp":1651363200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Expert Systems with Applications"],"published-print":{"date-parts":[[2022,5]]},"DOI":"10.1016\/j.eswa.2021.116375","type":"journal-article","created":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T22:15:26Z","timestamp":1641075326000},"page":"116375","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":5,"special_numbering":"C","title":["VSTAR: Visual Semantic Thumbnails and tAgs Revitalization"],"prefix":"10.1016","volume":"193","author":[{"ORCID":"http:\/\/orcid.org\/0000-0001-9481-511X","authenticated-orcid":false,"given":"Salvatore","family":"Carta","sequence":"first","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0001-5576-7311","authenticated-orcid":false,"given":"Alessandro","family":"Giuliani","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0003-1047-5491","authenticated-orcid":false,"given":"Leonardo","family":"Piano","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0002-7862-8362","authenticated-orcid":false,"given":"Alessandro Sebastian","family":"Podda","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0001-8646-6183","authenticated-orcid":false,"given":"Diego","family":"Reforgiato Recupero","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"6","key":"10.1016\/j.eswa.2021.116375_b1","doi-asserted-by":"crossref","first-page":"1129","DOI":"10.1016\/j.ipm.2018.08.001","article-title":"Semantic text classification: A survey of past and recent advances","volume":"54","author":"Altinel","year":"2018","journal-title":"Information Processing & Management"},{"key":"10.1016\/j.eswa.2021.116375_b2","series-title":"Proceedings of the SIGCHI Conference on Human Factors in Computing Systems","first-page":"971","article-title":"Why we tag: motivations for annotation in mobile and online media","author":"Ames","year":"2007"},{"key":"10.1016\/j.eswa.2021.116375_b3","doi-asserted-by":"crossref","first-page":"346","DOI":"10.1016\/j.knosys.2018.12.005","article-title":"A semantic similarity-based perspective of affect lexicons for sentiment analysis","volume":"165","author":"Araque","year":"2019","journal-title":"Knowledge-Based Systems"},{"key":"10.1016\/j.eswa.2021.116375_b4","series-title":"Proceedings of the 16th International Conference on Web Information Systems and Technologies","first-page":"183","article-title":"Automated tag enrichment by semantically related trends","author":"Arca","year":"2020"},{"key":"10.1016\/j.eswa.2021.116375_b5","doi-asserted-by":"crossref","unstructured":"Bajaj, P., Kavidayal, M., Srivastava, P., Akhtar, M., & Kumaraguru, P. (2016). Disinformation in multimedia annotation: Misleading metadata detection on YouTube. In Iv and L-MM 2016 - Proceedings of the 2016 ACM workshop on vision and language integration meets multimedia fusion, co-located with ACM multimedia 2016 (pp. 53\u201361), cited By 3.","DOI":"10.1145\/2983563.2983569"},{"issue":"4","key":"10.1016\/j.eswa.2021.116375_b6","doi-asserted-by":"crossref","first-page":"1443","DOI":"10.1007\/s11042-014-1976-4","article-title":"Data-driven approaches for social image and video tagging","volume":"74","author":"Ballan","year":"2015","journal-title":"Multimedia Tools and Applications"},{"key":"10.1016\/j.eswa.2021.116375_b7","series-title":"Proceedings of the 16th international conference on web information systems and technologies","first-page":"209","article-title":"Efficient thumbnail identification through object recognition","author":"Carta","year":"2020"},{"key":"10.1016\/j.eswa.2021.116375_b8","doi-asserted-by":"crossref","unstructured":"Chen, Y., Wang, S., Zhang, W., & Huang, Q. (2018). Less is more: Picking informative frames for video captioning. In Lecture notes in computer science (Including subseries lecture notes in artificial intelligence and lecture notes in bioinformatics), 11217 LNCS (pp. 367\u2013384).","DOI":"10.1007\/978-3-030-01261-8_22"},{"issue":"7","key":"10.1016\/j.eswa.2021.116375_b9","doi-asserted-by":"crossref","first-page":"999","DOI":"10.1109\/TCSVT.2012.2189478","article-title":"Tagging webcast text in baseball videos by video segmentation and text alignment","volume":"22","author":"Chiu","year":"2012","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"issue":"1","key":"10.1016\/j.eswa.2021.116375_b10","doi-asserted-by":"crossref","first-page":"37","DOI":"10.1177\/001316446002000104","article-title":"A coefficient of agreement for nominal scales","volume":"20","author":"Cohen","year":"1960","journal-title":"Educational and Psychological Measurement"},{"key":"10.1016\/j.eswa.2021.116375_b11","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.bdr.2016.10.001","article-title":"Producing linked data for smart cities: The case of catania","volume":"7","author":"Consoli","year":"2017","journal-title":"Big Data Research"},{"key":"10.1016\/j.eswa.2021.116375_b12","series-title":"Trends in Applied Knowledge-Based Systems and Data Science","first-page":"157","article-title":"A multimodal approach to relevance and pertinence of documents","author":"Cristani","year":"2016"},{"key":"10.1016\/j.eswa.2021.116375_b13","series-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2019"},{"key":"10.1016\/j.eswa.2021.116375_b14","doi-asserted-by":"crossref","unstructured":"Fernandez, D., Varas, D., Espadaler, J., Masuda, I., Ferreira, J., Woodward, A., Rodriguez, D., Giro-I-Nieto, X., Riveiro, J., & Bou, E. (2017). ViTS: Video tagging system from massive web multimedia collections. In Proceedings - 2017 IEEE international conference on computer vision workshops, Vol. 2018-January, (pp. 337\u2013346), cited By 6.","DOI":"10.1109\/ICCVW.2017.48"},{"key":"10.1016\/j.eswa.2021.116375_b15","unstructured":"Frome, A., Corrado, G. S., Shlens, J., Bengio, S., Dean, J., Ranzato, M., & Mikolov, T. (2013). DeViSE: A deep visual-semantic embedding model. In C. J. C. Burges and L. Bottou and Z. Ghahramani and K. Q. Weinberger (Ed.), NIPS (pp. 2121\u20132129)."},{"key":"10.1016\/j.eswa.2021.116375_b16","doi-asserted-by":"crossref","unstructured":"Gu, H., & Swaminathan, V. (2018). From thumbnails to summaries-A single deep neural network to rule them all. In 2018 IEEE international conference on multimedia and Expo (pp. 1\u20136).","DOI":"10.1109\/ICME.2018.8486533"},{"issue":"7","key":"10.1016\/j.eswa.2021.116375_b17","doi-asserted-by":"crossref","first-page":"1426","DOI":"10.1109\/TKDE.2017.2682858","article-title":"Engagement and popularity dynamics of YouTube videos and sensitivity to meta-data","volume":"29","author":"Hoiles","year":"2017","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"10.1016\/j.eswa.2021.116375_b18","series-title":"ACM Multimedia","first-page":"243","article-title":"Clickage: towards bridging semantic and intent gaps via mining click logs of search engines","author":"Hua","year":"2013"},{"issue":"19","key":"10.1016\/j.eswa.2021.116375_b19","doi-asserted-by":"crossref","first-page":"20341","DOI":"10.1007\/s11042-017-4781-z","article-title":"Tag refinement of micro-videos by learning from multiple data sources","volume":"76","author":"Huang","year":"2017","journal-title":"Multimedia Tools and Applications"},{"key":"10.1016\/j.eswa.2021.116375_b20","doi-asserted-by":"crossref","unstructured":"Ilyas, S., & Ur Rehman, H. (2019). A deep learning based approach for precise video tagging. In 2019 15th international conference on emerging technologies (pp. 1\u20136).","DOI":"10.1109\/ICET48972.2019.8994567"},{"issue":"4","key":"10.1016\/j.eswa.2021.116375_b21","doi-asserted-by":"crossref","first-page":"422","DOI":"10.1145\/582415.582418","article-title":"Cumulated gain-based evaluation of IR techniques","volume":"20","author":"J\u00e4rvelin","year":"2002","journal-title":"ACM Transactions on Information Systems"},{"key":"10.1016\/j.eswa.2021.116375_b22","doi-asserted-by":"crossref","first-page":"6071","DOI":"10.1109\/ACCESS.2019.2963535","article-title":"Movie tags prediction and segmentation using deep learning","volume":"8","author":"Khan","year":"2020","journal-title":"IEEE Access"},{"issue":"2","key":"10.1016\/j.eswa.2021.116375_b23","doi-asserted-by":"crossref","first-page":"167","DOI":"10.1111\/cgf.12550","article-title":"Comprehensible video thumbnails","volume":"34","author":"Kim","year":"2015","journal-title":"Computer Graphics Forum"},{"key":"10.1016\/j.eswa.2021.116375_b24","doi-asserted-by":"crossref","first-page":"545","DOI":"10.1007\/s10844-019-00574-9","article-title":"Unsupervised tag recommendation for popular and cold products","volume":"54","author":"Konjengbam","year":"2019","journal-title":"Journal of Intelligent Information Systems"},{"key":"10.1016\/j.eswa.2021.116375_b25","series-title":"Proceedings of the Workshop on Vision and Natural Language Processing","first-page":"10","article-title":"Generating natural-language video descriptions using text-mined knowledge","author":"Krishnamoorthy","year":"2013"},{"key":"10.1016\/j.eswa.2021.116375_b26","unstructured":"Lee, Y. J., Ghosh, J., & Grauman, K. (2012). Discovering important people and objects for egocentric video summarization. In 2012 IEEE conference on computer vision and pattern recognition (pp. 1346\u20131353)."},{"issue":"2","key":"10.1016\/j.eswa.2021.116375_b27","doi-asserted-by":"crossref","first-page":"167","DOI":"10.3233\/SW-140134","article-title":"Dbpedia - a large-scale, multilingual knowledge base extracted from wikipedia","volume":"6","author":"Lehmann","year":"2015","journal-title":"Semantic Web Journal"},{"key":"10.1016\/j.eswa.2021.116375_b28","doi-asserted-by":"crossref","unstructured":"Lipko, D. V., Ilyasov, T. K., & Arjakov, A. V. (2021). Automatic generation of preview images based on video sequence analysis using computer vision. In 2021 IEEE conference of russian young researchers in electrical and electronic engineering (pp. 2154\u20132157).","DOI":"10.1109\/ElConRus51938.2021.9396099"},{"key":"10.1016\/j.eswa.2021.116375_b29","doi-asserted-by":"crossref","unstructured":"Liu, C., Huang, Q., & Jiang, S. (2011). Query sensitive dynamic web video thumbnail generation. In 2011 18th IEEE international conference on image processing (pp. 2449\u20132452).","DOI":"10.1109\/ICIP.2011.6116155"},{"key":"10.1016\/j.eswa.2021.116375_b30","series-title":"CVPR","first-page":"3707","article-title":"Multi-task deep visual-semantic embedding for video thumbnail selection.","author":"Liu","year":"2015"},{"issue":"4","key":"10.1016\/j.eswa.2021.116375_b31","doi-asserted-by":"crossref","first-page":"465","DOI":"10.1080\/17512786.2019.1628657","article-title":"Making video news visible: Identifying the optimization strategies of the cybermedia on YouTube using web metrics","volume":"14","author":"Lopezosa","year":"2020","journal-title":"Journalism Practice"},{"key":"10.1016\/j.eswa.2021.116375_b32","doi-asserted-by":"crossref","first-page":"276","DOI":"10.11613\/BM.2012.031","article-title":"Interrater reliability: the kappa statistic","volume":"22","author":"McHugh","year":"2012","journal-title":"Biochemia Medica"},{"issue":"11","key":"10.1016\/j.eswa.2021.116375_b33","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1145\/219717.219748","article-title":"Wordnet: A lexical database for english","volume":"38","author":"Miller","year":"1995","journal-title":"Communications of the ACM"},{"key":"10.1016\/j.eswa.2021.116375_b34","doi-asserted-by":"crossref","unstructured":"Pan, Y., Yao, T., Mei, T., Li, H., Ngo, C.-W., & Rui, Y. (2014). Click-through-based cross-view learning for image search. In ACM SIGIR conference on research and development in information retrieval.","DOI":"10.1145\/2600428.2609568"},{"key":"10.1016\/j.eswa.2021.116375_b35","doi-asserted-by":"crossref","unstructured":"Patwardhan, A. A., Das, S., Varshney, S., Desarkar, M. S., & Dogra, D. P. (2019). ViTag: Automatic video tagging using segmentation and conceptual inference. In 2019 IEEE fifth international conference on multimedia big data (pp. 271\u2013276).","DOI":"10.1109\/BigMM.2019.00-12"},{"key":"10.1016\/j.eswa.2021.116375_b36","doi-asserted-by":"crossref","unstructured":"Presutti, V., Consoli, S., Nuzzolese, A., Recupero, D., Gangemi, A., Bannour, I., & Zargayouna, H. (2014). Uncovering the semantics of wikipedia pagelinks. In Lecture Notes in computer science (including subseries lecture notes in artificial intelligence and lecture notes in bioinformatics), Vol. 8876 (pp. 413\u2013428), cited By 19.","DOI":"10.1007\/978-3-319-13704-9_32"},{"key":"10.1016\/j.eswa.2021.116375_b37","series-title":"Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing","article-title":"Sentence-BERT: Sentence embeddings using siamese BERT-networks","author":"Reimers","year":"2019"},{"key":"10.1016\/j.eswa.2021.116375_b38","doi-asserted-by":"crossref","first-page":"203","DOI":"10.1016\/j.jbusres.2019.04.018","article-title":"Machine learning approach to auto-tagging online content for content marketing efficiency: A comparative analysis between methods and content type","volume":"101","author":"Salminen","year":"2019","journal-title":"Journal of Business Research"},{"key":"10.1016\/j.eswa.2021.116375_b39","series-title":"Proceedings of the 25th ACM Conference on Hypertext and Social Media","first-page":"273","article-title":"On the choice of data sources to improve content discoverability via textual feature optimization","author":"Santos-Neto","year":"2014"},{"key":"10.1016\/j.eswa.2021.116375_b40","series-title":"Proceedings of the 1st International Workshop on Social Multimedia and Storytelling Co-Located with ACM International Conference on Multimedia Retrieval","article-title":"Towards boosting video popularity via tag selection","volume":"vol. 1198","author":"Santos-Neto","year":"2014"},{"issue":"1","key":"10.1016\/j.eswa.2021.116375_b41","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1007\/s00530-014-0399-4","article-title":"Accurate online video tagging via probabilistic hybrid modeling","volume":"22","author":"Shen","year":"2016","journal-title":"Multimedia Systems"},{"issue":"2","key":"10.1016\/j.eswa.2021.116375_b42","doi-asserted-by":"crossref","first-page":"34","DOI":"10.1109\/MMUL.2018.112135923","article-title":"Image and video captioning with augmented neural architectures","volume":"25","author":"Shetty","year":"2018","journal-title":"IEEE MultiMedia"},{"issue":"4","key":"10.1016\/j.eswa.2021.116375_b43","doi-asserted-by":"crossref","first-page":"411","DOI":"10.1016\/j.cviu.2009.03.011","article-title":"Video shot boundary detection: Seven years of trecvid activity","volume":"114","author":"Smeaton","year":"2010","journal-title":"Computer Vision and Image Understanding"},{"key":"10.1016\/j.eswa.2021.116375_b44","series-title":"CIKM","first-page":"659","article-title":"To click or not to click: Automatic selection of beautiful thumbnails from videos.","author":"Song","year":"2016"},{"issue":"7","key":"10.1016\/j.eswa.2021.116375_b45","doi-asserted-by":"crossref","first-page":"3303","DOI":"10.1109\/TIP.2017.2666039","article-title":"Semantic highlight retrieval and term prediction","volume":"26","author":"Sun","year":"2017","journal-title":"IEEE Transactions on Image Processing"},{"issue":"6","key":"10.1016\/j.eswa.2021.116375_b46","doi-asserted-by":"crossref","first-page":"1689","DOI":"10.1108\/INTR-10-2019-0406","article-title":"Youtube marketing: how marketers\u2019 video optimization practices influence video views","volume":"30","author":"Tafesse","year":"2020","journal-title":"Internet Research"},{"key":"10.1016\/j.eswa.2021.116375_b47","doi-asserted-by":"crossref","unstructured":"Toderici, G., Aradhye, H., Pas\u00e7a, M., Sbaiz, L., & Yagnik, J. (2010). Finding meaning on YouTube: Tag recommendation and category discovery. In 2010 IEEE computer society conference on computer vision and pattern recognition (pp. 3447\u20133454).","DOI":"10.1109\/CVPR.2010.5539985"},{"issue":"3","key":"10.1016\/j.eswa.2021.116375_b48","doi-asserted-by":"crossref","DOI":"10.1145\/2516633.2516635","article-title":"Improving contextual advertising by adopting collaborative filtering","volume":"7","author":"Vargiu","year":"2013","journal-title":"ACM Transactions on the Web"},{"key":"10.1016\/j.eswa.2021.116375_b49","first-page":"582","article-title":"Query-adaptive video summarization via quality-aware relevance estimation","author":"Vasudevan","year":"2017"},{"key":"10.1016\/j.eswa.2021.116375_b50","series-title":"Proceedings of the 18th International Conference on World Wide Web","first-page":"361","article-title":"Learning to tag","author":"Wu","year":"2009"},{"key":"10.1016\/j.eswa.2021.116375_b51","series-title":"Proceedings of the 32nd International Conference on International Conference on Machine Learning - Vol. 37","first-page":"2048","article-title":"Show, attend and tell: Neural image caption generation with visual attention","author":"Xu","year":"2015"},{"key":"10.1016\/j.eswa.2021.116375_b52","first-page":"2346","article-title":"Jointly modeling deep video and compositional text to bridge vision and language in a unified framework","author":"Xu","year":"2015"},{"key":"10.1016\/j.eswa.2021.116375_b53","doi-asserted-by":"crossref","unstructured":"Yang, W., & Toderici, G. (2011). Discriminative tag learning on YouTube videos with latent sub-tags. In CVPR 2011 (pp. 3217\u20133224).","DOI":"10.1109\/CVPR.2011.5995402"},{"key":"10.1016\/j.eswa.2021.116375_b54","first-page":"2332","article-title":"Sentence specified dynamic video thumbnail generation","author":"Yuan","year":"2019"},{"key":"10.1016\/j.eswa.2021.116375_b55","doi-asserted-by":"crossref","unstructured":"Zeng, Z., Xue, C., Gao, N., Wang, L., & Liu, Z. (2018). Learning from audience intelligence: Dynamic labeled LDA model for time-sync commented video tagging. In CONIP (pp. 546\u2013559).","DOI":"10.1007\/978-3-030-04182-3_48"}],"container-title":["Expert Systems with Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417421016675?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417421016675?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,2,20]],"date-time":"2024-02-20T19:57:48Z","timestamp":1708459068000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0957417421016675"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5]]},"references-count":55,"alternative-id":["S0957417421016675"],"URL":"https:\/\/doi.org\/10.1016\/j.eswa.2021.116375","relation":{},"ISSN":["0957-4174"],"issn-type":[{"value":"0957-4174","type":"print"}],"subject":[],"published":{"date-parts":[[2022,5]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"VSTAR: Visual Semantic Thumbnails and tAgs Revitalization","name":"articletitle","label":"Article Title"},{"value":"Expert Systems with Applications","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.eswa.2021.116375","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2021 Elsevier Ltd. All rights reserved.","name":"copyright","label":"Copyright"}],"article-number":"116375"}}