{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,4]],"date-time":"2024-09-04T08:52:28Z","timestamp":1725439948045},"reference-count":43,"publisher":"Elsevier BV","issue":"3","license":[{"start":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:00:00Z","timestamp":1661990400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2021,10,12]],"date-time":"2021-10-12T00:00:00Z","timestamp":1633996800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100010418","name":"Institute for Information and Communications Technology Promotion","doi-asserted-by":"publisher","award":["2019-0-00026"],"id":[{"id":"10.13039\/501100010418","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100014188","name":"Ministry of Science and ICT, South Korea","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100014188","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003621","name":"Ministry of Science, ICT and Future Planning","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003621","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["ICT Express"],"published-print":{"date-parts":[[2022,9]]},"DOI":"10.1016\/j.icte.2021.10.001","type":"journal-article","created":{"date-parts":[[2021,10,25]],"date-time":"2021-10-25T19:50:11Z","timestamp":1635191411000},"page":"444-462","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":4,"title":["Android malware dataset construction methodology to minimize bias\u2013variance\u200b tradeoff"],"prefix":"10.1016","volume":"8","author":[{"ORCID":"http:\/\/orcid.org\/0000-0002-0715-9685","authenticated-orcid":false,"given":"Shinho","family":"Lee","sequence":"first","affiliation":[]},{"given":"Wookhyun","family":"Jung","sequence":"additional","affiliation":[]},{"given":"Wonrak","family":"Lee","sequence":"additional","affiliation":[]},{"given":"Hyung Geun","family":"Oh","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0002-8707-7216","authenticated-orcid":false,"given":"Eui Tak","family":"Kim","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"5","key":"10.1016\/j.icte.2021.10.001_b1","doi-asserted-by":"crossref","first-page":"185","DOI":"10.3390\/info12050185","article-title":"A comprehensive survey on machine learning techniques for android malware detection","volume":"12","author":"Kouliaridis","year":"2021","journal-title":"Information"},{"key":"10.1016\/j.icte.2021.10.001_b2","unstructured":"F. Pendlebury, F. Pierazzi, R. Jordaney, J. Kinder, L. Cavallaro, TESSERACT: Eliminating experimental bias in malware classification across space and time, in: Proceedings of the 28th USENIX Security Symposium, 14-16 2019, Santa Clara, CA, USA, pp. 729\u2013746."},{"key":"10.1016\/j.icte.2021.10.001_b3","doi-asserted-by":"crossref","unstructured":"K. Allix, T.F. Bissyand\u00e9, J. Klein, Y.L. Traon, Are your training datasets yet relevant?, in: Proceedings of the 7th International Symposium on Engineering Secure Software and Systems, ESSoS, 4-6 2015, Milan, Italy, pp. 51\u201367.","DOI":"10.1007\/978-3-319-15618-7_5"},{"key":"10.1016\/j.icte.2021.10.001_b4","unstructured":"R. Jordaney, K. Sharad, S.K. Dash, Z. Wang, D. Papini, Transcend: Detecting concept drift in malware classification models, in: Proceedings of the 26th USENIX Security Symposium, 16-18 2017, Vancouver, BC, Canada, pp. 625\u2013642."},{"key":"10.1016\/j.icte.2021.10.001_b5","article-title":"A survey of malware detection in android apps: Recommendations and perspectives for future research","volume":"39","author":"Razgallah","year":"2021","journal-title":"Comp. Sci. Rev."},{"key":"10.1016\/j.icte.2021.10.001_b6","doi-asserted-by":"crossref","unstructured":"Y. Zhou, X. Jiang, Dissecting android malware: Characterization and evolution, in: Proceedings of the 2012 IEEE Symposium on Security and Privacy, 20-23 2012, San Francisco, CA, USA, pp. 95\u2013109.","DOI":"10.1109\/SP.2012.16"},{"key":"10.1016\/j.icte.2021.10.001_b7","doi-asserted-by":"crossref","unstructured":"K. Allix, T.F. Bissyand\u00e9, J. Klein, Y. Le\u00a0Traon, AndroZoo: Collecting millions of android apps for the research community, in: Proceedings of the 2016 IEEE\/ACM 13th Working Conference on Mining Software Repositories, MSR, 14-15 2016, Austin, TX, USA, pp. 468\u2013471.","DOI":"10.1145\/2901739.2903508"},{"key":"10.1016\/j.icte.2021.10.001_b8","doi-asserted-by":"crossref","unstructured":"F. Wei, Y. Li, S. Roy, X. Ou, W. Zhou, Deep ground truth analysis of current android malware, in: Proceedings of the 14th International Conference on Detection of Intrusions and Malware, and Vulnerability Assessment, DIMVA, 6-7 2017, Bonn, Germany, pp. 252\u2013276.","DOI":"10.1007\/978-3-319-60876-1_12"},{"key":"10.1016\/j.icte.2021.10.001_b9","doi-asserted-by":"crossref","first-page":"128","DOI":"10.1016\/j.inffus.2018.12.006","article-title":"Android malware detection through hybrid features fusion and ensemble classifiers: The AndroPyTool framework and the OmniDroid dataset","volume":"52","author":"Mart\u00edn","year":"2019","journal-title":"Inf. Fusion"},{"key":"10.1016\/j.icte.2021.10.001_b10","doi-asserted-by":"crossref","unstructured":"S. Lee, W. Jung, S. Lee, E.T. Kim, Malware response naming scheme for security control service, in: Proceedings of the 2020 International Conference on Information and Communication Technology Convergence, ICTC, 21-23 2020, Jeju, Korea, pp. 1549\u20131552.","DOI":"10.1109\/ICTC49870.2020.9289450"},{"key":"10.1016\/j.icte.2021.10.001_b11","doi-asserted-by":"crossref","unstructured":"M. Bailey, J. Oberheide, J. Andersen, Z.M. Mao, F. Jahanian, J. Nazario, Automated classification and analysis of internet malware, in: Proceedings of the 10th International Workshop on Recent Advances in Intrusion Detection, RAID, 5-7 2007, Gold Goast, Australia, pp. 178\u2013197.","DOI":"10.1007\/978-3-540-74320-0_10"},{"key":"10.1016\/j.icte.2021.10.001_b12","doi-asserted-by":"crossref","unstructured":"V.S. Sheng, F. Provost, P.G. Ipeirotis, Get another label? Improving data quality and data mining using multiple, noisy labelers, in: Proceedings of the 14th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD, 24-27 2008, Las Vegas, Nevada, USA, pp. 614\u2013622.","DOI":"10.1145\/1401890.1401965"},{"key":"10.1016\/j.icte.2021.10.001_b13","doi-asserted-by":"crossref","unstructured":"F. Maggi, A. Bellini, G. Salvaneschi, S. Zanero, Finding non-trivial malware naming inconsistencies, in: Proceedings of the 7th International Conference on Information Systems Security, ICISS, 15-19 2011, Kolkata, India, pp. 144\u2013159.","DOI":"10.1007\/978-3-642-25560-1_10"},{"key":"10.1016\/j.icte.2021.10.001_b14","doi-asserted-by":"crossref","unstructured":"A. Mohaisen, O. Alrawi, AV-Meter: An evaluation of antivirus scans and labels, in: Proceedings of the 11th International Conference on Detection of Intrusions and Malware, and Vulnerability Assessment, DIMVA, 10-11 2014, Egham, UK, pp. 112\u2013131.","DOI":"10.1007\/978-3-319-08509-8_7"},{"key":"10.1016\/j.icte.2021.10.001_b15","doi-asserted-by":"crossref","unstructured":"B. Miller, A. Kantchelian, M.C. Tschantz, S. Afroz, R. Bachwani, R. Faizullabhoy, L. Huang, V. Shankar, T. Wu, G. Yiu, A.D. Joseph, J.D. Tygar, Reviewer integration and performance measurement for malware detection, in: Proceedings of the 13th International Conference on Detection of Intrusions and Malware, and Vulnerability Assessment, DIMVA, 7-8 2016, San Sebasti\u00e1n, Spain, pp. 122\u2013141.","DOI":"10.1007\/978-3-319-40667-1_7"},{"key":"10.1016\/j.icte.2021.10.001_b16","doi-asserted-by":"crossref","unstructured":"R. Perdisci, . M.\u00a0U, VAMO: Towards a fully automated malware clustering validity analysis, in: Proceedings of the 28th Annual Computer Security Applications Conference, ACSAC, 3-7 2012, Orlando, Florida, USA, pp. 329\u2013338.","DOI":"10.1145\/2420950.2420999"},{"key":"10.1016\/j.icte.2021.10.001_b17","doi-asserted-by":"crossref","unstructured":"A. Kantchelian, M.C. Tschantz, S. Afroz, B. Miller, V. Shankar, R. Bachwani, A.D. Joseph, J.D. Tygar, Better malware ground truth: Techniques for weighting anti-virus vendor labels, in: Proceedings of the 8th ACM Workshop on Artificial Intelligence and Security, AISec, 16 2015, Denver, Colorado, USA, pp. 45\u201356.","DOI":"10.1145\/2808769.2808780"},{"key":"10.1016\/j.icte.2021.10.001_b18","doi-asserted-by":"crossref","unstructured":"M. Sebasti\u00e1n, R. Rivera, P. Kotzias, J. Caballero, AVclass: A tool for massive malware labeling, in: Proceedings of the 19th International Symposium on Research in Attacks, Intrusions, and Defenses, RAID, 19-21 2016, Paris, France, pp. 230\u2013253.","DOI":"10.1007\/978-3-319-45719-2_11"},{"key":"10.1016\/j.icte.2021.10.001_b19","doi-asserted-by":"crossref","unstructured":"M. Hurier, G. Suarez-Tangil, S.K. Dash, T.F. Bissyand\u00e9, Y. Le\u00a0Traon, J. Klein, L. Cavallaro, Euphony: Harmonious unification of cacophonous anti-virus vendor labels for android malware, in: Proceedings of the 2017 IEEE\/ACM 14th International Conference on Mining Software Repositories MSR, 20-21 2017, Buenos Aires, Argentina, pp. 425\u2013435.","DOI":"10.1109\/MSR.2017.57"},{"key":"10.1016\/j.icte.2021.10.001_b20","doi-asserted-by":"crossref","unstructured":"S. Sebasti\u00e1n, J. Caballero, AVclass2: Massive malware tag extraction from AV labels, in: Proceedings of the 37th Annual Computer Security Applications Conference, ACSAC, 7-11 2020, Austin, USA, pp. 42\u201353.","DOI":"10.1145\/3427228.3427261"},{"key":"10.1016\/j.icte.2021.10.001_b21","unstructured":"E. Damiani, S.D.C. di\u00a0Vimercati, S. Paraboschi, P. Samarati, An open digest-based technique for spam detection, in: Proceedings of the ISCA 17th International Conference on Parallel and Distributed Computing Systems, PDCS, 15-17 2004, San Francisco, California, USA, pp. 559\u2013564."},{"key":"10.1016\/j.icte.2021.10.001_b22","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1016\/j.diin.2006.06.015","article-title":"Identifying almost identical files using context triggered piecewise hashing","volume":"3","author":"Kornblum","year":"2006","journal-title":"Digit. Investig."},{"key":"10.1016\/j.icte.2021.10.001_b23","series-title":"SimHash: Hash-Based Similarity Detection","author":"Sadowski","year":"2007"},{"key":"10.1016\/j.icte.2021.10.001_b24","doi-asserted-by":"crossref","unstructured":"V. Roussev, Data fingerprinting with similarity digests, in: Proceedings of the 6th Annual IFIP WG 11.9 International Conference on Digital Forensics, 4-6 2010, Hong Kong, China, pp. 207\u2013226.","DOI":"10.1007\/978-3-642-15506-2_15"},{"key":"10.1016\/j.icte.2021.10.001_b25","doi-asserted-by":"crossref","unstructured":"J. Oliver, C. Cheng, Y. Chen, TLSH - A locality sensitive hash, in: Proceedings of the 2013 Fourth Cybercrime and Trustworthy Computing Workshop, CTC, 21-22 2013, Sydney, NSW, Australia, pp. 7\u201313.","DOI":"10.1109\/CTC.2013.9"},{"key":"10.1016\/j.icte.2021.10.001_b26","doi-asserted-by":"crossref","first-page":"34","DOI":"10.1016\/j.diin.2017.12.004","article-title":"Distance, an effective alternative to ssdeep and sdhash","volume":"24","author":"Raff","year":"2018","journal-title":"Digit. Investig."},{"key":"10.1016\/j.icte.2021.10.001_b27","doi-asserted-by":"crossref","first-page":"113","DOI":"10.1016\/j.diin.2019.04.006","article-title":"Fbhash: A new similarity hashing scheme for digital forensics","volume":"29","author":"Chang","year":"2019","journal-title":"Digital Investig."},{"issue":"4","key":"10.1016\/j.icte.2021.10.001_b28","doi-asserted-by":"crossref","first-page":"503","DOI":"10.3390\/math8040503","article-title":"An improved bytewise approximate matching algorithm suitable for files of dissimilar sizes","volume":"8","author":"Martinez","year":"2020","journal-title":"Mathematics"},{"key":"10.1016\/j.icte.2021.10.001_b29","series-title":"Detecting the Theft of Programs Using Birthmarks","author":"Tamada","year":"2003"},{"key":"10.1016\/j.icte.2021.10.001_b30","doi-asserted-by":"crossref","unstructured":"G. Myles, C. Collberg, K-gram based software birthmarks, in: Proceedings of the 20th ACM symposium on Applied computing, SAC, 13-17 2005, Santa Fe, New Mexico, pp. 314\u2013318.","DOI":"10.1145\/1066677.1066753"},{"key":"10.1016\/j.icte.2021.10.001_b31","doi-asserted-by":"crossref","unstructured":"J. Ko, H. Shim, D. Kim, Y.S. Jeong, S.J. Cho, M. Park, S. Han, S.B. Kim, Measuring similarity of android applications via reversing and K-gram birthmarking, in: Proceedings of the 2013 Research in Adaptive and Convergent Systems, RACS, 1-4 2013, Montreal, Quebec, Canada, pp. 336\u2013341.","DOI":"10.1145\/2513228.2513308"},{"key":"10.1016\/j.icte.2021.10.001_b32","doi-asserted-by":"crossref","unstructured":"S. Lee, W. Jung, S. Kim, E.T. Kim, Android malware similarity clustering using method based opcode sequence and jaccard index, in: Proceedings of the 2019 International Conference on Information and Communication Technology Convergence, ICTC, 16-18 2019, Jeju, Korea, pp. 178\u2013183.","DOI":"10.1109\/ICTC46691.2019.8939894"},{"issue":"4","key":"10.1016\/j.icte.2021.10.001_b33","doi-asserted-by":"crossref","first-page":"3905","DOI":"10.1007\/s13369-019-03718-9","article-title":"Software birthmark design and estimation: A systematic literature review","volume":"44","author":"Nazir","year":"2019","journal-title":"Arab. J. Sci. Eng."},{"key":"10.1016\/j.icte.2021.10.001_b34","article-title":"Dexofuzzy: Android malware similarity clustering method using opcode sequence","author":"Lee","year":"2019","journal-title":"Virus Bull."},{"issue":"168","key":"10.1016\/j.icte.2021.10.001_b35","article-title":"Approximate matching: Definition and terminology","volume":"800","author":"Breitinger","year":"2014","journal-title":"NIST Spec. Publ."},{"key":"10.1016\/j.icte.2021.10.001_b36","article-title":"Bringing order to approximate matching: Classification and attacks on similarity digest algorithms, forensic science international","volume":"36","author":"Mart\u00edn-P\u00e9rez","year":"2021","journal-title":"Digit. Invest."},{"key":"10.1016\/j.icte.2021.10.001_b37","doi-asserted-by":"crossref","unstructured":"K.H. Lee, W.J. Park, K.S. Cho, W. Ryu, RealCatch: A community-based real-time platform for financial fraud protection on smartphones, in: Proceedings of the 2014 International Conference on Information and Communication Technology Convergence, ICTC, 22-24 2014, Busan, Korea, pp. 362\u2013366.","DOI":"10.1109\/ICTC.2014.6983155"},{"key":"10.1016\/j.icte.2021.10.001_b38","article-title":"Optimizing ssdeep for use at scale","author":"Wallace","year":"2015","journal-title":"Virus Bull."},{"issue":"383","key":"10.1016\/j.icte.2021.10.001_b39","doi-asserted-by":"crossref","first-page":"553","DOI":"10.1080\/01621459.1983.10478008","article-title":"A method for comparing two hierarchical clusterings","volume":"78","author":"Fowlkes","year":"1983","journal-title":"J. Amer. Statist. Assoc."},{"issue":"336","key":"10.1016\/j.icte.2021.10.001_b40","doi-asserted-by":"crossref","first-page":"846","DOI":"10.1080\/01621459.1971.10482356","article-title":"Objective criteria for the evaluation of clustering methods","volume":"66","author":"Rand","year":"1971","journal-title":"J. Amer. Statist. Assoc."},{"key":"10.1016\/j.icte.2021.10.001_b41","doi-asserted-by":"crossref","unstructured":"J.M. Santos, M. Embrechts, On the use of the adjusted rand index as a metric for evaluating supervised classification, in: Proceedings of the 19th International Conference on Artificial Neural Networks, ICANN, 14-17 2009, Limassol, Cyprus, pp. 175\u2013184.","DOI":"10.1007\/978-3-642-04277-5_18"},{"key":"10.1016\/j.icte.2021.10.001_b42","first-page":"2837","article-title":"Information theoretic measures for clusterings comparison: Variants, properties, normalization and correction for chance","volume":"11","author":"Vinh","year":"2010","journal-title":"J. Mach. Learn. Res."},{"issue":"1","key":"10.1016\/j.icte.2021.10.001_b43","doi-asserted-by":"crossref","first-page":"193","DOI":"10.1007\/BF01908075","article-title":"Comparing partitions","volume":"2","author":"Hubert","year":"1985","journal-title":"J. Classification"}],"container-title":["ICT Express"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S2405959521001351?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S2405959521001351?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2023,1,13]],"date-time":"2023-01-13T20:08:12Z","timestamp":1673640492000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S2405959521001351"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,9]]},"references-count":43,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2022,9]]}},"alternative-id":["S2405959521001351"],"URL":"https:\/\/doi.org\/10.1016\/j.icte.2021.10.001","relation":{},"ISSN":["2405-9595"],"issn-type":[{"value":"2405-9595","type":"print"}],"subject":[],"published":{"date-parts":[[2022,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Android malware dataset construction methodology to minimize bias\u2013variance\u200b tradeoff","name":"articletitle","label":"Article Title"},{"value":"ICT Express","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.icte.2021.10.001","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2021 The Author(s). Published by Elsevier B.V. on behalf of The Korean Institute of Communications and Information Sciences.","name":"copyright","label":"Copyright"}]}}