{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,11,19]],"date-time":"2024-11-19T18:43:35Z","timestamp":1732041815027},"reference-count":91,"publisher":"Springer Science and Business Media LLC","license":[{"start":{"date-parts":[[2022,8,30]],"date-time":"2022-08-30T00:00:00Z","timestamp":1661817600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,8,30]],"date-time":"2022-08-30T00:00:00Z","timestamp":1661817600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Inf Syst Front"],"DOI":"10.1007\/s10796-022-10333-x","type":"journal-article","created":{"date-parts":[[2022,8,30]],"date-time":"2022-08-30T12:02:51Z","timestamp":1661860971000},"update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["Deep Reinforcement Learning in the Advanced Cybersecurity Threat Detection and Protection"],"prefix":"10.1007","author":[{"ORCID":"http:\/\/orcid.org\/0000-0001-8375-5713","authenticated-orcid":false,"given":"Mohit","family":"Sewak","sequence":"first","affiliation":[]},{"given":"Sanjay K.","family":"Sahay","sequence":"additional","affiliation":[]},{"given":"Hemant","family":"Rathore","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,8,30]]},"reference":[{"key":"10333_CR1","doi-asserted-by":"publisher","unstructured":"Abu Rajab, M., Zarfoss, J., Monrose, F., & Terzis, A. (2006). A multifaceted approach to understanding the botnet phenomenon. In Proceedings of the 6th ACM SIGCOMM conference on internet measurement (p. 41\u201352). Association for Computing Machinery. https:\/\/doi.org\/10.1145\/1177080.1177086","DOI":"10.1145\/1177080.1177086"},{"key":"10333_CR2","doi-asserted-by":"publisher","first-page":"52138","DOI":"10.1109\/ACCESS.2018.2870052","volume":"6","author":"A Adadi","year":"2018","unstructured":"Adadi, A., & Berrada, M. (2018). Peeking inside the black-box: A survey on explainable artificial intelligence (XAI). IEEE Access, 6, 52138\u201352160. https:\/\/doi.org\/10.1109\/ACCESS.2018.2870052.","journal-title":"IEEE Access"},{"key":"10333_CR3","unstructured":"Anderson, H. S., Kharkar, A., Filar, B., & Roth, P. (2017). Evading machine learning malware detection. Black Hat"},{"issue":"4","key":"10333_CR4","doi-asserted-by":"publisher","first-page":"1975","DOI":"10.1109\/TNSM.2020.3031843","volume":"17","author":"G Apruzzese","year":"2020","unstructured":"Apruzzese, G., Andreolini, M., Marchetti, M., Venturi, A., & Colajanni, M. (2020). Deep reinforcement adversarial learning against botnet evasion attacks. IEEE Transactions on Network and Service Management, 17(4), 1975\u20131987. https:\/\/doi.org\/10.1109\/TNSM.2020.3031843.","journal-title":"IEEE Transactions on Network and Service Management"},{"key":"10333_CR5","doi-asserted-by":"publisher","unstructured":"Arjoune, Y., & Faruque, S. (2020). Smart jamming attacks in 5g new radio: A review. In: 2020 10th annual computing and communication workshop and conference (CCWC) (pp. 1010\u20131015). https:\/\/doi.org\/10.1109\/CCWC47524.2020.9031175","DOI":"10.1109\/CCWC47524.2020.9031175"},{"key":"10333_CR6","doi-asserted-by":"publisher","unstructured":"Athiwaratkun, B., & Stokes, J. W. (2017). Malware classification with lstm and gru language models and a character-level cnn. In 2017 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 2482\u20132486). https:\/\/doi.org\/10.1109\/ICASSP.2017.7952603","DOI":"10.1109\/ICASSP.2017.7952603"},{"key":"10333_CR7","doi-asserted-by":"publisher","first-page":"757","DOI":"10.1016\/j.procs.2015.10.114","volume":"70","author":"CK Behera","year":"2015","unstructured":"Behera, C. K., & Bhaskari, D. L. (2015). Different obfuscation techniques for code protection. Procedia Computer Science, 70, 757\u2013763.","journal-title":"Procedia Computer Science"},{"key":"10333_CR8","doi-asserted-by":"publisher","unstructured":"Berman, D. S., Buczak, A. L., Chavis, J. S., & Corbett, C. L. (2019). A survey of deep learning methods for cyber security. Information, 10(4). https:\/\/doi.org\/10.3390\/info10040122","DOI":"10.3390\/info10040122"},{"issue":"1","key":"10333_CR9","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1109\/SURV.2013.052213.00046","volume":"16","author":"MH Bhuyan","year":"2014","unstructured":"Bhuyan, M. H., Bhattacharyya, D. K., & Kalita, J. K. (2014). Network anomaly detection: Methods, systems and tools. IEEE Communications Surveys Tutorials, 16(1), 303\u2013336. https:\/\/doi.org\/10.1109\/SURV.2013.052213.00046.","journal-title":"IEEE Communications Surveys Tutorials"},{"key":"10333_CR10","doi-asserted-by":"publisher","unstructured":"Birman, Y., Hindi, S., Katz, G., & Shabtai, A. (2020). Cost-effective malware detection as a service over serverless cloud using deep reinforcement learning. In: 2020 20th IEEE\/ACM international symposium on cluster, cloud and internet computing (CCGRID) (pp. 420\u2013429). https:\/\/doi.org\/10.1109\/CCGrid49817.2020.00-51","DOI":"10.1109\/CCGrid49817.2020.00-51"},{"key":"10333_CR11","doi-asserted-by":"publisher","unstructured":"Bridges, R. A., Glass-Vanderlan, T. R., Iannacone, M. D., Vincent, M. S., & Chen, Q. G. (2019). A survey of intrusion detection systems leveraging host data. ACM Computing Surveys, 52(6). https:\/\/doi.org\/10.1145\/3344382","DOI":"10.1145\/3344382"},{"issue":"2","key":"10333_CR12","doi-asserted-by":"publisher","first-page":"156","DOI":"10.1109\/TSMCC.2007.913919","volume":"38","author":"L Busoniu","year":"2008","unstructured":"Busoniu, L., Babuska, R., & De Schutter, B. (2008). A comprehensive survey of multiagent reinforcement learning. IEEE Transactions on Systems, Man, and Cybernetics, Part C (Applications and Reviews), 38(2), 156\u2013172. https:\/\/doi.org\/10.1109\/TSMCC.2007.913919.","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics, Part C (Applications and Reviews)"},{"key":"10333_CR13","doi-asserted-by":"publisher","unstructured":"Cai, Y., Shi, K., Song, F., Xu, Y., Wang, X., & Luan, H. (2019). Jamming pattern recognition using spectrum waterfall: A deep learning method. In 2019 IEEE 5th international conference on computer and communications (ICCC) (pp. 2113\u20132117). https:\/\/doi.org\/10.1109\/ICCC47050.2019.9064207","DOI":"10.1109\/ICCC47050.2019.9064207"},{"key":"10333_CR14","doi-asserted-by":"publisher","unstructured":"Chalaki, B., Beaver, L. E., Remer, B., Jang, K., Vinitsky, E., Bayen, A. M., & Malikopoulos, A. A. (2020). Zero-shot autonomous vehicle policy transfer: From simulation to real-world via adversarial learning. In 2020 IEEE 16th international conference on control & automation (ICCA) (pp. 35\u201340). https:\/\/doi.org\/10.1109\/ICCA51439.2020.9264552","DOI":"10.1109\/ICCA51439.2020.9264552"},{"key":"10333_CR15","doi-asserted-by":"publisher","unstructured":"Chen, Y., Li, Y., Xu, D., & Xiao, L. (2018). Dqn-based power control for iot transmission against jamming. In 2018 IEEE 87th vehicular technology conference (VTC Spring) (pp. 1\u20135). https:\/\/doi.org\/10.1109\/VTCSpring.2018.8417695","DOI":"10.1109\/VTCSpring.2018.8417695"},{"key":"10333_CR16","doi-asserted-by":"crossref","unstructured":"Chen, Y., Li, Y., Xu, D., & Xiao, L. (2018). Dqn-based power control for iot transmission against jamming. In 2018 IEEE 87th vehicular technology conference (VTC Spring) (pp. 1\u20135). IEEE","DOI":"10.1109\/VTCSpring.2018.8417695"},{"key":"10333_CR17","unstructured":"Chow, Y., & Ghavamzadeh, M. (2014). Algorithms for cvar optimization in mdps. In Advances in neural information processing systems (NIPS) (pp. 3509\u20133517)"},{"key":"10333_CR18","unstructured":"Das, A., & Rad, P. (2020). Opportunities and challenges in explainable artificial intelligence (XAI): A survey. arXiv:2006.11371"},{"key":"10333_CR19","unstructured":"David, W. (2019). UNSW-NB15 datasets. https:\/\/www.kaggle.com\/mrwellsdavid\/unsw-nb15. Accessed 2021 June 27"},{"key":"10333_CR20","unstructured":"Dazeley, R., Vamplew, P., & Cruz, F. (2021). Explainable reinforcement learning for broad-XAI: A Conceptual framework and survey. arXiv:2108.09003"},{"key":"10333_CR21","doi-asserted-by":"publisher","first-page":"48867","DOI":"10.1109\/ACCESS.2019.2908033","volume":"7","author":"Z Fang","year":"2019","unstructured":"Fang, Z., Wang, J., Li, B., Wu, S., Zhou, Y., & Huang, H. (2019). Evading anti-malware engines with deep reinforcement learning. IEEE Access, 7, 48867\u201348879. https:\/\/doi.org\/10.1109\/ACCESS.2019.2908033.","journal-title":"IEEE Access"},{"key":"10333_CR22","unstructured":"Firstbrook, P., Hallawell, A., Girard, J., & MacDonald, N. (2009). Magic quadrant for endpoint protection platforms. Gartner RAS Core Research Note G, 208912"},{"key":"10333_CR23","doi-asserted-by":"publisher","first-page":"11","DOI":"10.35377\/saucis.04.01.834048","volume":"4","author":"HG G\u00fclmez","year":"2021","unstructured":"G\u00fclmez, H. G., & Ang\u0131n, P. (2021). A study on the efficacy of deep reinforcement learning for intrusion detection. Sakarya University Journal of Computer and Information Sciences, 4, 11\u201325. https:\/\/doi.org\/10.35377\/saucis.04.01.834048.","journal-title":"Sakarya University Journal of Computer and Information Sciences"},{"key":"10333_CR24","doi-asserted-by":"publisher","unstructured":"Han, Y., Rubinstein, B. I., Abraham, T., Alpcan, T., Vel, O. D., Erfani, S., Hubczenko, D., Leckie, C., & Montague, P. (2018). Reinforcement learning for autonomous defence in software-defined networking. In International conference on decision and game theory for security (pp. 145\u2013165). Springer . https:\/\/doi.org\/10.1007\/978-3-030-01554-1_9","DOI":"10.1007\/978-3-030-01554-1_9"},{"key":"10333_CR25","doi-asserted-by":"publisher","unstructured":"Han, G., Xiao, L., & Poor, H. V. (2017). Two-dimensional anti-jamming communication based on deep reinforcement learning. In 2017 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 2087\u20132091). IEEE. https:\/\/doi.org\/10.1109\/ICASSP.2017.7952524","DOI":"10.1109\/ICASSP.2017.7952524"},{"key":"10333_CR26","doi-asserted-by":"crossref","unstructured":"Han, G., Xiao, L., & Poor, H. V. (2017). Two-dimensional anti-jamming communication based on deep reinforcement learning. In: 2017 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 2087\u20132091). IEEE","DOI":"10.1109\/ICASSP.2017.7952524"},{"key":"10333_CR27","doi-asserted-by":"publisher","unstructured":"Heady, R., Luger, G., Maccabe, A., & Servilla, M. (1990). The architecture of a network level intrusion detection system. Office of Scientific and Technical Information, U.S: Department of Energy. https:\/\/doi.org\/10.2172\/425295.","DOI":"10.2172\/425295"},{"key":"10333_CR28","doi-asserted-by":"publisher","unstructured":"Hsu, Y. F., & Matsuoka, M. (2020). A deep reinforcement learning approach for anomaly network intrusion detection system. In 2020 IEEE 9th international conference on cloud networking (CloudNet) (pp. 1\u20136). https:\/\/doi.org\/10.1109\/CloudNet51028.2020.9335796","DOI":"10.1109\/CloudNet51028.2020.9335796"},{"key":"10333_CR29","unstructured":"Hu, W., & Tan, Y. (2017). Generating adversarial malware examples for black-box attacks based on gan. arXiv:1702.05983"},{"key":"10333_CR30","doi-asserted-by":"publisher","unstructured":"Kienzle, D. M., & Elder, M. C. (2003). Recent worms: A survey and trends. In Proceedings of the 2003 ACM workshop on rapid malcode, WORM \u201903, (p. 1\u201310). Association for Computing Machinery, New York, NY, USA . https:\/\/doi.org\/10.1145\/948187.948189","DOI":"10.1145\/948187.948189"},{"key":"10333_CR31","doi-asserted-by":"publisher","unstructured":"Lakshmi, V. (2019). Beginning Security with Microsoft Technologies. Springer. https:\/\/doi.org\/10.1007\/978-1-4842-4853-9.","DOI":"10.1007\/978-1-4842-4853-9"},{"key":"10333_CR32","unstructured":"Leibo, J. Z., Due\u00f1ez-Guzman, E. A., Vezhnevets, A., Agapiou, J. P., Sunehag, P., Koster, R., Matyas, J., Beattie, C., Mordatch, I., & Graepel, T. (2021). Scalable evaluation of multi-agent reinforcement learning with melting pot. In International conference on machine learning (pp. 6187\u20136199). PMLR"},{"key":"10333_CR33","doi-asserted-by":"publisher","unstructured":"Li, Y., Liu, J., Li, Q., & Xiao, L. (2015). Mobile cloud offloading for malware detections with learning. In 2015 IEEE conference on computer communications workshops (INFOCOM WKSHPS) (pp. 197\u2013201). https:\/\/doi.org\/10.1109\/INFCOMW.2015.7179384","DOI":"10.1109\/INFCOMW.2015.7179384"},{"issue":"1","key":"10333_CR34","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1016\/j.jnca.2012.09.004","volume":"36","author":"HJ Liao","year":"2013","unstructured":"Liao, H. J., Richard Lin, C. H., Lin, Y. C., & Tung, K. Y. (2013). Intrusion detection system: A comprehensive review. Journal of Network and Computer Applications, 36(1), 16\u201324. https:\/\/doi.org\/10.1016\/j.jnca.2012.09.004.","journal-title":"Journal of Network and Computer Applications"},{"key":"10333_CR35","doi-asserted-by":"crossref","unstructured":"Lin, Z., Shi, Y., & Xue, Z. (2022). Idsgan: Generative adversarial networks for attack generation against intrusion detection. In Pacific-asia conference on knowledge discovery and data mining (pp. 79\u201391). Springer","DOI":"10.1007\/978-3-031-05981-0_7"},{"key":"10333_CR36","unstructured":"Liu, S. (2020). Endpoint detection and response (EDR) and endpoint protection platform (EPP) market size worldwide from 2015 to 2020 . https:\/\/www.statista.com\/statistics\/799060\/worldwideedr-epp-market-size\/ . Accessed 2021 June 27"},{"key":"10333_CR37","doi-asserted-by":"publisher","unstructured":"Liu, Y., Dong, M., Ota, K., Li, J., & Wu, J. (2018). Deep reinforcement learning based smart mitigation of ddos flooding in software-defined networks. In 2018 IEEE 23rd international workshop on computer aided modeling and design of communication links and networks (CAMAD) (pp. 1\u20136). https:\/\/doi.org\/10.1109\/CAMAD.2018.8514971","DOI":"10.1109\/CAMAD.2018.8514971"},{"issue":"5","key":"10333_CR38","doi-asserted-by":"publisher","first-page":"998","DOI":"10.1109\/LCOMM.2018.2815018","volume":"22","author":"X Liu","year":"2018","unstructured":"Liu, X., Xu, Y., Jia, L., Wu, Q., & Anpalagan, A. (2018). Anti-jamming communications using spectrum waterfall: A deep reinforcement learning approach. IEEE Communications Letters, 22(5), 998\u20131001. https:\/\/doi.org\/10.1109\/LCOMM.2018.2815018.","journal-title":"IEEE Communications Letters"},{"issue":"5","key":"10333_CR39","doi-asserted-by":"publisher","first-page":"998","DOI":"10.1109\/LCOMM.2018.2815018","volume":"22","author":"X Liu","year":"2018","unstructured":"Liu, X., Xu, Y., Jia, L., Wu, Q., & Anpalagan, A. (2018). Anti-jamming communications using spectrum waterfall: A deep reinforcement learning approach. IEEE Communications Letters, 22(5), 998\u20131001.","journal-title":"IEEE Communications Letters"},{"key":"10333_CR40","doi-asserted-by":"publisher","first-page":"112963","DOI":"10.1016\/j.eswa.2019.112963","volume":"141","author":"M Lopez-Martin","year":"2020","unstructured":"Lopez-Martin, M., Carro, B., & Sanchez-Esguevillas, A. (2020). Application of deep reinforcement learning to intrusion detection for supervised problems. Expert Systems with Applications, 141, 112963. https:\/\/doi.org\/10.1016\/j.eswa.2019.112963.","journal-title":"Expert Systems with Applications"},{"key":"10333_CR41","doi-asserted-by":"publisher","first-page":"270","DOI":"10.1016\/j.engappai.2015.01.013","volume":"41","author":"K Malialis","year":"2015","unstructured":"Malialis, K., & Kudenko, D. (2015). Distributed response to network intrusions using multiagent reinforcement learning. Engineering Applications of Artificial Intelligence, 41, 270\u2013284. https:\/\/doi.org\/10.1016\/j.engappai.2015.01.013.","journal-title":"Engineering Applications of Artificial Intelligence"},{"key":"10333_CR42","unstructured":"Mnih, V., Badia, A. P., Mirza, M., Graves, A., Lillicrap, T., Harley, T., Silver, D., & Kavukcuoglu, K. (2016) Asynchronous methods for deep reinforcement learning. In International conference on machine learning (pp. 1928\u20131937). PMLR"},{"issue":"7540","key":"10333_CR43","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A. A., Veness, J., Bellemare, M. G., et al. (2015). Human-level control through deep reinforcement learning. Nature, 518(7540), 529\u2013533. https:\/\/doi.org\/10.1038\/nature14236.","journal-title":"Nature"},{"key":"10333_CR44","unstructured":"Mohi-ud din, G. (2017). NSL-KDD dataset. https:\/\/www.unb.ca\/cic\/datasets\/nsl.html. Accessed 2020 June 27"},{"issue":"1","key":"10333_CR45","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1007\/s10207-014-0248-7","volume":"14","author":"A Nappa","year":"2015","unstructured":"Nappa, A., Rafique, M. Z., & Caballero, J. (2015). The malicia dataset: identification and analysis of drive-by download operations. International Journal of Information Security, 14(1), 15\u201333.","journal-title":"International Journal of Information Security"},{"key":"10333_CR46","doi-asserted-by":"publisher","unstructured":"Nguyen, T. T., & Reddi, V. J. (2021). Deep reinforcement learning for cyber security. IEEE Transactions on Neural Networks and Learning Systems, pp. 1\u201317. https:\/\/doi.org\/10.1109\/TNNLS.2021.3121870","DOI":"10.1109\/TNNLS.2021.3121870"},{"key":"10333_CR47","unstructured":"OroojlooyJadid, A., & Hajinezhad, D. (2019). A review of cooperative multi-agent deep reinforcement learning. arXiv:1908.03963"},{"issue":"10","key":"10333_CR48","doi-asserted-by":"publisher","first-page":"1345","DOI":"10.1109\/TKDE.2009.191","volume":"22","author":"SJ Pan","year":"2010","unstructured":"Pan, S. J., & Yang, Q. (2010). A survey on transfer learning. IEEE Transactions on Knowledge and Data Engineering, 22(10), 1345\u20131359. https:\/\/doi.org\/10.1109\/TKDE.2009.191.","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"issue":"2","key":"10333_CR49","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1839667.1839672","volume":"7","author":"D Pao","year":"2010","unstructured":"Pao, D., Lin, W., & Liu, B. (2010). A memory-efficient pipelined implementation of the aho-corasick string-matching algorithm. ACM Transactions on Architecture and Code Optimization (TACO), 7(2), 1\u201327.","journal-title":"ACM Transactions on Architecture and Code Optimization (TACO)"},{"key":"10333_CR50","doi-asserted-by":"crossref","unstructured":"Rathore, H., Nikam, P., Sahay, S. K., & Sewak, M. (2021a). Identification of adversarial android intents using reinforcement learning. In 2021 international joint conference on neural networkks (IJCNN), (pp. 1\u20138). IEEE","DOI":"10.1109\/IJCNN52387.2021.9534142"},{"key":"10333_CR51","doi-asserted-by":"crossref","unstructured":"Rathore, H., Sahay, S. K., Rajvanshi, R., & Sewak, M. (2020a). Identification of significant permissions for efficient android malware detection. In International conference on broadband communications, networks and systems, (pp. 33\u201352). Springer","DOI":"10.1007\/978-3-030-68737-3_3"},{"key":"10333_CR52","doi-asserted-by":"crossref","unstructured":"Rathore, H., Sahay, S. K., Thukral, S., & Sewak, M. (2020b). Detection of malicious android applications: Classical machine learning vs. deep neural network integrated with clustering. In International conference on broadband communications, networks and systems (pp. 109\u2013128). Springer","DOI":"10.1007\/978-3-030-68737-3_7"},{"key":"10333_CR53","doi-asserted-by":"crossref","unstructured":"Rathore, H., Sharma, S. C., Sahay, S. K., & Sewak, M. (2022c). Are malware detection classifiers adversarially vulnerable to actor-critic based evasion attacks? EAI Endorsed Transactions on Scalable Information Systems pp. e74","DOI":"10.4108\/eai.31-5-2022.174087"},{"issue":"4","key":"10333_CR54","doi-asserted-by":"publisher","first-page":"867","DOI":"10.1007\/s10796-020-10083-8","volume":"23","author":"H Rathore","year":"2021","unstructured":"Rathore, H., Sahay, S. K., Nikam, P., & Sewak, M. (2021). Robust android malware detection system against adversarial attacks using q-learning. Information Systems Frontiers, 23(4), 867\u2013882.","journal-title":"Information Systems Frontiers"},{"key":"10333_CR55","first-page":"301183","volume":"37","author":"H Rathore","year":"2021","unstructured":"Rathore, H., Samavedhi, A., Sahay, S. K., & Sewak, M. (2021). Robust malware detection models: learning from adversarial attacks and defenses. Forensic Science International: Digital Investigation, 37, 301183.","journal-title":"Forensic Science International: Digital Investigation"},{"key":"10333_CR56","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., & Moritz, P. (2015). Trust region policy optimization. In International conference on machine learning (pp. 1889\u20131897). PMLR"},{"key":"10333_CR57","unstructured":"Schulman, J., Moritz, P., Levine, S., Jordan, M., & Abbeel, P. (2015). High-dimensional continuous control using generalized advantage estimation. arXiv:1506.02438"},{"key":"10333_CR58","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., & Klimov, O. (2017). Proximal policy optimization algorithms. arXiv:1707.06347"},{"key":"10333_CR59","doi-asserted-by":"publisher","unstructured":"Sethi, K., Edupuganti, S., Kumar, R., Bera, P., & Madhav, Y. (2020). A context-aware robust intrusion detection system: a reinforcement learning-based approach. International Journal of Information Security, 19,. https:\/\/doi.org\/10.1007\/s10207-019-00482-7.","DOI":"10.1007\/s10207-019-00482-7"},{"key":"10333_CR60","doi-asserted-by":"publisher","unstructured":"Sewak, M. (2019a). Coding the environment and mdp solution. In Deep reinforcement learning (pp. 29\u201349). Springer. https:\/\/doi.org\/10.1007\/978-981-13-8285-7_3","DOI":"10.1007\/978-981-13-8285-7_3"},{"key":"10333_CR61","doi-asserted-by":"publisher","unstructured":"Sewak, M. (2019b). Deep q network (dqn), double dqn, and dueling dqn. In Deep reinforcement learning (pp. 95\u2013108). Springer. https:\/\/doi.org\/10.1007\/978-981-13-8285-7_8","DOI":"10.1007\/978-981-13-8285-7_8"},{"key":"10333_CR62","doi-asserted-by":"publisher","unstructured":"Sewak, M. (2019d). Deterministic policy gradient and the ddpg. In Deep reinforcement learning (pp. 173\u2013184). Springer. https:\/\/doi.org\/10.1007\/978-981-13-8285-7_13","DOI":"10.1007\/978-981-13-8285-7_13"},{"key":"10333_CR63","doi-asserted-by":"publisher","unstructured":"Sewak, M. (2019e). Introduction to reinforcement learning. In Deep reinforcement learning (pp. 1\u201318). Springer. https:\/\/doi.org\/10.1007\/978-981-13-8285-7_1","DOI":"10.1007\/978-981-13-8285-7_1"},{"key":"10333_CR64","doi-asserted-by":"publisher","unstructured":"Sewak, M. (2019f). Policy-based reinforcement learning approaches. In Deep reinforcement learning (pp. 127\u2013140). Springer. https:\/\/doi.org\/10.1007\/978-981-13-8285-7_10","DOI":"10.1007\/978-981-13-8285-7_10"},{"key":"10333_CR65","doi-asserted-by":"publisher","unstructured":"Sewak, M., Sahay, S., & Rathore, H. (2020a). Value-approximation based deep reinforcement learning techniques: An overview. In 2020 IEEE 5th international conference on computing communication and automation (ICCCA) (pp. 379\u2013384). https:\/\/doi.org\/10.1109\/ICCCA49541.2020.9250787","DOI":"10.1109\/ICCCA49541.2020.9250787"},{"key":"10333_CR66","doi-asserted-by":"crossref","unstructured":"Sewak, M., Sahay, S. K., & Rathore, H. (2020b). Deepintent: Implicitintent based android ids with e2e deep learning architecture. In 2020 IEEE 31st annual international symposium on personal, indoor and mobile radio communications (pp. 1\u20136). IEEE","DOI":"10.1109\/PIMRC48278.2020.9217188"},{"key":"10333_CR67","doi-asserted-by":"publisher","unstructured":"Sewak, M., Sahay, S. K., & Rathore, H. (2020c). DOOM: a novel adversarial-drl-based op-code level metamorphic malware obfuscator for the enhancement of IDS. In UbiComp\/ISWC \u201920: 2020 ACM international joint conference on pervasive and ubiquitous computing and 2020 ACM international symposium on wearable computers, Virtual Event, Mexico, September 12-17, 2020 (pp. 131\u2013134). ACM. https:\/\/doi.org\/10.1145\/3410530.3414411","DOI":"10.1145\/3410530.3414411"},{"key":"10333_CR68","doi-asserted-by":"publisher","unstructured":"Sewak, M., Sahay, S. K., & Rathore, H. (2021b). Adversarialuscator: An adversarial-drl based obfuscator and metamorphic malware swarm generator. In International joint conference on neural networks (IJCNN 2021), (pp. 1\u20139.) IEEE . https:\/\/doi.org\/10.1109\/IJCNN52387.2021.9534016","DOI":"10.1109\/IJCNN52387.2021.9534016"},{"key":"10333_CR69","doi-asserted-by":"publisher","unstructured":"Sewak, M., Sahay, S. K., & Rathore, H. (2021c). Dro: A data-scarce mechanism to revolutionize the performance of dl-based security systems. In 46th IEEE Conference on Local Computer Networks (LCN 2021) (pp. 581\u2013588). IEEE . https:\/\/doi.org\/10.1109\/LCN52139.2021.9524929","DOI":"10.1109\/LCN52139.2021.9524929"},{"key":"10333_CR70","doi-asserted-by":"crossref","unstructured":"Sewak, M., Sahay, S. K., & Rathore, H. (2022). Policy-approximation based deep reinforcement learning techniques: an overview. Information and Communication Technology for Competitive Strategies (ICTCS 2020) (pp. 493\u2013507)","DOI":"10.1007\/978-981-16-0739-4_47"},{"issue":"1","key":"10333_CR71","doi-asserted-by":"publisher","first-page":"55","DOI":"10.14429\/dsj.71.15780","volume":"71","author":"M Sewak","year":"2021","unstructured":"Sewak, M., Sahay, S., & Rathore, H. (2021). Drldo a novel drl based de obfuscation system for defence against metamorphic malware. Defence Science Journal, 71(1), 55\u201365. https:\/\/doi.org\/10.14429\/dsj.71.15780. https:\/\/publications.drdo.gov.in\/ojs\/index.php\/dsj\/article\/view\/15780.","journal-title":"Defence Science Journal"},{"key":"10333_CR72","doi-asserted-by":"publisher","unstructured":"Sewak, M. (2019). Deep reinforcement learning - frontiers of artificial intelligence. Springer. https:\/\/doi.org\/10.1007\/978-981-13-8285-7.","DOI":"10.1007\/978-981-13-8285-7"},{"issue":"1","key":"10333_CR73","doi-asserted-by":"publisher","first-page":"182","DOI":"10.1166\/jctn.2020.8648","volume":"17","author":"M Sewak","year":"2020","unstructured":"Sewak, M., Sahay, S. K., & Rathore, H. (2020). An overview of deep learning architecture of deep neural networks and autoencoders. Journal of Computational and Theoretical Nanoscience, 17(1), 182\u2013188. https:\/\/doi.org\/10.1166\/jctn.2020.8648.","journal-title":"Journal of Computational and Theoretical Nanoscience"},{"key":"10333_CR74","doi-asserted-by":"publisher","unstructured":"Suwannalai, E., & Polprasert, C. (2020). Network intrusion detection systems using adversarial reinforcement learning with deep q-network. In 2020 18th international conference on ICT and knowledge engineering (ICT KE) (pp. 1\u20137). https:\/\/doi.org\/10.1109\/ICTKE50349.2020.9289884","DOI":"10.1109\/ICTKE50349.2020.9289884"},{"key":"10333_CR75","unstructured":"Teh, Y., Bapst, V., Czarnecki, W. M., Quan, J., Kirkpatrick, J., Hadsell, R., Heess, N., & Pascanu, R. (2017). Distral: Robust multitask reinforcement learning. Advances in Neural Information Processing Systems, 30"},{"issue":"1","key":"10333_CR76","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1109\/MDT.2010.7","volume":"27","author":"M Tehranipoor","year":"2010","unstructured":"Tehranipoor, M., & Koushanfar, F. (2010). A survey of hardware trojan taxonomy and detection. IEEE Design Test of Computers, 27(1), 10\u201325. https:\/\/doi.org\/10.1109\/MDT.2010.7.","journal-title":"IEEE Design Test of Computers"},{"issue":"11","key":"10333_CR77","doi-asserted-by":"publisher","first-page":"8693","DOI":"10.1109\/JIOT.2020.3040957","volume":"8","author":"A Uprety","year":"2021","unstructured":"Uprety, A., & Rawat, D. B. (2021). Reinforcement learning for iot security: A comprehensive survey. IEEE Internet of Things Journal, 8(11), 8693\u20138706. https:\/\/doi.org\/10.1109\/JIOT.2020.3040957.","journal-title":"IEEE Internet of Things Journal"},{"key":"10333_CR78","doi-asserted-by":"crossref","unstructured":"Van Hasselt, H., Guez, A., & Silver, D. (2016). Deep reinforcement learning with double q-learning. In Proceedings of the AAAI conference on artificial intelligence (Vol.\u00a030)","DOI":"10.1609\/aaai.v30i1.10295"},{"issue":"7782","key":"10333_CR79","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals, O., Babuschkin, I., Czarnecki, W. M., Mathieu, M., Dudzik, A., Chung, J., et al. (2019). Grandmaster level in starcraft ii using multi-agent reinforcement learning. Nature, 575(7782), 350\u2013354. https:\/\/doi.org\/10.1038\/s41586-019-1724-z.","journal-title":"Nature"},{"key":"10333_CR80","unstructured":"Wagner, O. (2022). Nearly 400 car crashes in 11 months involved automated tech, companies tell regulators. https:\/\/www.npr.org\/2022\/06\/15\/1105252793\/nearly-400-car-crashes-in-11-months-involved-automated-tech-companies-tell-regul#:~:text=Automated%20tech%20factored%20in%20392,11%20months%2C %20regulators%20report %20 %3A %20NPR &text=Press-,Automated %20tech%20factored%20in %20392 %20car %20crashes %20in %2011 %20months,July%202021%20to %20May %202022. Accessed 2022 July 8"},{"key":"10333_CR81","doi-asserted-by":"publisher","unstructured":"Wan, X., Sheng, G., Li, Y., Xiao, L., & Du, X. (2017). Reinforcement learning based mobile offloading for cloud-based malware detection. In IEEE Global Communications Conference (GLOBECOM 2017) (pp. 1\u20136). https:\/\/doi.org\/10.1109\/GLOCOM.2017.8254503","DOI":"10.1109\/GLOCOM.2017.8254503"},{"key":"10333_CR82","unstructured":"Wang, Z., Schaul, T., Hessel, M., Van Hasselt, H., Lanctot, M., & De\u00a0Freitas, N. (2016). Dueling network architectures for deep reinforcement learning. In International conference on machine learning (ICML 16) (pp. 1995\u20132003)"},{"key":"10333_CR83","doi-asserted-by":"publisher","unstructured":"Wang, Y., Stokes, J. W., & Marinescu, M. (2019). Neural malware control with deep reinforcement learning. In MILCOM 2019 - 2019 IEEE military communications conference (MILCOM) (pp. 1\u20138). https:\/\/doi.org\/10.1109\/MILCOM47813.2019.9020862","DOI":"10.1109\/MILCOM47813.2019.9020862"},{"key":"10333_CR84","doi-asserted-by":"publisher","unstructured":"Wells, L., & Bednarz, T. (2021). Explainable ai and reinforcement learning\u2013a systematic review of current approaches and trends. Frontiers in Artificial Intelligence, 4,. https:\/\/doi.org\/10.3389\/frai.2021.550030.","DOI":"10.3389\/frai.2021.550030"},{"key":"10333_CR85","unstructured":"Weng, L. (2019). Meta reinforcement learning. lilianweng. github. io\/lillog"},{"key":"10333_CR86","doi-asserted-by":"publisher","unstructured":"Wilson, A., Fern, A., Ray, S., & Tadepalli, P. (2007). Multi-task reinforcement learning: A hierarchical bayesian approach. In 24th international conference on machine learning (p. 1015\u20131022). https:\/\/doi.org\/10.1145\/1273496.1273624","DOI":"10.1145\/1273496.1273624"},{"key":"10333_CR87","doi-asserted-by":"crossref","unstructured":"Wu, D., Fang, B., Wang, J., Liu, Q., & Cui, X. (2019). Evading machine learning botnet detection models via deep reinforcement learning. In IEEE international conference on communications (ICC) (pp. 1\u20136). IEEE","DOI":"10.1109\/ICC.2019.8761337"},{"issue":"3","key":"10333_CR88","doi-asserted-by":"publisher","first-page":"542","DOI":"10.1109\/LCOMM.2018.2792015","volume":"22","author":"L Xiao","year":"2018","unstructured":"Xiao, L., Wan, X., Su, W., Tang, Y., et al. (2018). Anti-jamming underwater transmission with mobility and learning. IEEE Communications Letters, 22(3), 542\u2013545.","journal-title":"IEEE Communications Letters"},{"issue":"1","key":"10333_CR89","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1109\/TNET.2004.842221","volume":"13","author":"D Yau","year":"2005","unstructured":"Yau, D., Lui, J., Liang, F., & Yam, Y. (2005). Defending against distributed denial-of-service attacks with max-min fair server-centric router throttles. IEEE\/ACM Transactions on Networking, 13(1), 29\u201342. https:\/\/doi.org\/10.1109\/TNET.2004.842221.","journal-title":"IEEE\/ACM Transactions on Networking"},{"key":"10333_CR90","doi-asserted-by":"publisher","unstructured":"You, I., & Yim, K. (2010). Malware obfuscation techniques: A brief survey. In 2010 international conference on broadband, wireless computing, communication and applications (pp. 297\u2013300). https:\/\/doi.org\/10.1109\/BWCCA.2010.85","DOI":"10.1109\/BWCCA.2010.85"},{"key":"10333_CR91","first-page":"3562","volume":"31","author":"T Zahavy","year":"2018","unstructured":"Zahavy, T., Haroush, M., Merlis, N., Mankowitz, D. J., & Mannor, S. (2018). Learn what not to learn: Action elimination with deep reinforcement learning. Advances in Neural Information Processing Systems, 31, 3562\u20133573.","journal-title":"Advances in Neural Information Processing Systems"}],"container-title":["Information Systems Frontiers"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10796-022-10333-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10796-022-10333-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10796-022-10333-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,30]],"date-time":"2022-08-30T12:15:23Z","timestamp":1661861723000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10796-022-10333-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,30]]},"references-count":91,"alternative-id":["10333"],"URL":"https:\/\/doi.org\/10.1007\/s10796-022-10333-x","relation":{},"ISSN":["1387-3326","1572-9419"],"issn-type":[{"value":"1387-3326","type":"print"},{"value":"1572-9419","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,8,30]]},"assertion":[{"value":"15 August 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 August 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no relevant financial or non-financial interests to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of interest"}}]}}