{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,8,31]],"date-time":"2024-08-31T06:34:33Z","timestamp":1725086073434},"reference-count":64,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Knowledge-Based Systems"],"published-print":{"date-parts":[[2021,6]]},"DOI":"10.1016\/j.knosys.2021.106967","type":"journal-article","created":{"date-parts":[[2021,3,18]],"date-time":"2021-03-18T14:42:34Z","timestamp":1616078554000},"page":"106967","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":8,"special_numbering":"C","title":["RL-VAEGAN: Adversarial defense for reinforcement learning agents via style transfer"],"prefix":"10.1016","volume":"221","author":[{"given":"Yueyue","family":"Hu","sequence":"first","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0001-7069-3752","authenticated-orcid":false,"given":"Shiliang","family":"Sun","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.knosys.2021.106967_b1","doi-asserted-by":"crossref","unstructured":"G. Zheng, F. Zhang, Z. Zheng, Y. Xiang, N.J. Yuan, X. Xie, Z. Li, DRN: A deep reinforcement learning framework for news recommendation, in: World Wide Web Conference, 2018, pp. 167\u2013176.","DOI":"10.1145\/3178876.3185994"},{"key":"10.1016\/j.knosys.2021.106967_b2","series-title":"Safe, multi-agent, reinforcement learning for autonomous driving","first-page":"1","author":"Shalev-Shwartz","year":"2016"},{"key":"10.1016\/j.knosys.2021.106967_b3","unstructured":"T.P. Lillicrap, J.J. Hunt, A. Pritzel, N. Heess, T. Erez, Y. Tassa, D. Silver, D. Wierstra, Continuous control with deep reinforcement learning, in: International Conference on Machine Learning, 2016, pp. 1\u201314."},{"key":"10.1016\/j.knosys.2021.106967_b4","unstructured":"Y. Duan, X. Chen, R. Houthooft, J. Schulman, P. Abbeel, Benchmarking deep reinforcement learning for continuous control, in: International Conference on Machine Learning, 2016, pp. 1329\u20131338."},{"key":"10.1016\/j.knosys.2021.106967_b5","series-title":"Tactics of adversarial attack on deep reinforcement learning agents","first-page":"1","author":"Lin","year":"2017"},{"key":"10.1016\/j.knosys.2021.106967_b6","series-title":"Robust Optimization, Vol. 28","author":"Ben-Tal","year":"2009"},{"key":"10.1016\/j.knosys.2021.106967_b7","doi-asserted-by":"crossref","first-page":"317","DOI":"10.1016\/j.patcog.2018.07.023","article-title":"Wild patterns: Ten years after the rise of adversarial machine learning","volume":"84","author":"Biggio","year":"2018","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.knosys.2021.106967_b8","unstructured":"I. Goodfellow, J. Shlens, C. Szegedy, Explaining and harnessing adversarial examples, in: International Conference on Machine Learning, 2015, pp. 1\u201311."},{"key":"10.1016\/j.knosys.2021.106967_b9","unstructured":"C. Szegedy, W. Zaremba, I. Sutskever, J. Bruna, D. Erhan, I. Goodfellow, R. Fergus, Intriguing properties of neural networks, in: International Conference on Machine Learning, 2014, pp. 1\u201310."},{"key":"10.1016\/j.knosys.2021.106967_b10","doi-asserted-by":"crossref","first-page":"108","DOI":"10.1016\/j.knosys.2019.05.007","article-title":"Adversary resistant deep neural networks via advanced feature nullification","volume":"179","author":"Han","year":"2019","journal-title":"Knowl.-Based Syst."},{"key":"10.1016\/j.knosys.2021.106967_b11","doi-asserted-by":"crossref","unstructured":"R. Jia, P. Liang, Adversarial examples for evaluating reading comprehension systems, in: Conference on Empirical Methods in Natural Language Processing, 2017, pp. 1\u201311.","DOI":"10.18653\/v1\/D17-1215"},{"key":"10.1016\/j.knosys.2021.106967_b12","doi-asserted-by":"crossref","unstructured":"N. Carlini, D. Wagner, Audio adversarial examples: Targeted attacks on speech-to-text, in: IEEE Security and Privacy Workshops, 2018, pp. 1\u20137.","DOI":"10.1109\/SPW.2018.00009"},{"key":"10.1016\/j.knosys.2021.106967_b13","unstructured":"L. Schmidt, S. Santurkar, D. Tsipras, K. Talwar, A. Madry, Adversarially robust generalization requires more data, in: Advances in Neural Information Processing Systems, 2018, pp. 5014\u20135026."},{"key":"10.1016\/j.knosys.2021.106967_b14","series-title":"There is no free lunch in adversarial robustness (but there are unexpected benefits)","first-page":"1","author":"Dimitris\u00a0Tsipras","year":"2018"},{"key":"10.1016\/j.knosys.2021.106967_b15","unstructured":"S. Bubeck, Y.T. Lee, E. Price, I. Razenshteyn, Adversarial examples from computational constraints, in: International Conference on Machine Learning, 2019, pp. 831\u2013840."},{"key":"10.1016\/j.knosys.2021.106967_b16","series-title":"Advances in Neural Information Processing Systems","first-page":"125","article-title":"Adversarial examples are not bugs, they are features","author":"Ilyas","year":"2019"},{"key":"10.1016\/j.knosys.2021.106967_b17","series-title":"Understanding and mitigating the tradeoff between robustness and accuracy","first-page":"1","author":"Raghunathan","year":"2020"},{"key":"10.1016\/j.knosys.2021.106967_b18","unstructured":"J. Gilmer, L. Metz, F. Faghri, S.S. Schoenholz, M. Raghu, M. Wattenberg, I. Goodfellow, Adversarial spheres, in: Workshop of International Conference on Learning Representations, 2018, pp. 1\u201314."},{"key":"10.1016\/j.knosys.2021.106967_b19","series-title":"Adversarial attacks on neural network policies","first-page":"1","author":"Huang","year":"2017"},{"key":"10.1016\/j.knosys.2021.106967_b20","unstructured":"A. Pattanaik, Z. Tang, S. Liu, G. Bommannan, G. Chowdhary, Robust deep reinforcement learning with adversarial attacks, in: International Conference on Autonomous Agents and Multiagent Systems, 2018, pp. 2040\u20132042."},{"key":"10.1016\/j.knosys.2021.106967_b21","series-title":"Delving into adversarial attacks on deep policies","first-page":"1","author":"Kos","year":"2017"},{"key":"10.1016\/j.knosys.2021.106967_b22","series-title":"Robust adversarial reinforcement learning","first-page":"1","author":"Pinto","year":"2017"},{"key":"10.1016\/j.knosys.2021.106967_b23","doi-asserted-by":"crossref","unstructured":"X. Pan, D. Seita, Y. Gao, J. Canny, Risk averse robust adversarial reinforcement learning, in: International Conference on Robotics and Automation, 2019, pp. 8522\u20138528.","DOI":"10.1109\/ICRA.2019.8794293"},{"key":"10.1016\/j.knosys.2021.106967_b24","series-title":"Advances in Neural Information Processing Systems","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"Sutton","year":"2000"},{"issue":"7540","key":"10.1016\/j.knosys.2021.106967_b25","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"Mnih","year":"2015","journal-title":"Nature"},{"key":"10.1016\/j.knosys.2021.106967_b26","doi-asserted-by":"crossref","unstructured":"N. Carlini, D. Wagner, Towards evaluating the robustness of neural networks, in: IEEE Symposium on Security and Privacy, 2017, pp. 39\u201357.","DOI":"10.1109\/SP.2017.49"},{"key":"10.1016\/j.knosys.2021.106967_b27","series-title":"Ensemble adversarial training: Attacks and defenses","first-page":"1","author":"Tram\u00e8r","year":"2017"},{"key":"10.1016\/j.knosys.2021.106967_b28","series-title":"Delving into transferable adversarial examples and black-box attacks","first-page":"1","author":"Liu","year":"2016"},{"key":"10.1016\/j.knosys.2021.106967_b29","doi-asserted-by":"crossref","unstructured":"N. Papernot, P. McDaniel, I. Goodfellow, S. Jha, Z.B. Celik, A. Swami, Practical black-box attacks against machine learning, in: Proceedings of the ACM on Asia Conference on Computer and Communications Security, 2017, pp. 506\u2013519.","DOI":"10.1145\/3052973.3053009"},{"key":"10.1016\/j.knosys.2021.106967_b30","doi-asserted-by":"crossref","unstructured":"X. Huang, M. Liu, S. Belongie, J. Kautz, Multimodal unsupervised image-to-image translation, in: European Conference on Computer Vision, 2018, pp. 172\u2013189.","DOI":"10.1007\/978-3-030-01219-9_11"},{"key":"10.1016\/j.knosys.2021.106967_b31","doi-asserted-by":"crossref","unstructured":"J.-Y. Zhu, T. Park, P. Isola, A.A. Efros, Unpaired image-to-image translation using cycle-consistent adversarial networks, in: International Conference on Computer Vision, 2017, pp. 2223\u20132232.","DOI":"10.1109\/ICCV.2017.244"},{"key":"10.1016\/j.knosys.2021.106967_b32","doi-asserted-by":"crossref","unstructured":"L.A. Gatys, A.S. Ecker, M. Bethge, Image style transfer using convolutional neural networks, in: Conference on Computer Vision and Pattern, 2016, pp. 2414\u20132423.","DOI":"10.1109\/CVPR.2016.265"},{"key":"10.1016\/j.knosys.2021.106967_b33","unstructured":"J. Schulman, S. Levine, P. Abbeel, M. Jordan, P. Moritz, Trust region policy optimization, in: International Conference on Machine Learning, 2015, pp. 1889\u20131897."},{"key":"10.1016\/j.knosys.2021.106967_b34","unstructured":"V. Mnih, A.P. Badia, M. Mirza, A. Graves, T. Lillicrap, T. Harley, D. Silver, K. Kavukcuoglu, Asynchronous methods for deep reinforcement learning, in: International Conference on Machine Learning, 2016, pp. 1928\u20131937."},{"key":"10.1016\/j.knosys.2021.106967_b35","series-title":"International Conference on Machine Learning and Data Mining in Pattern Recognition","first-page":"262","article-title":"Vulnerability of deep reinforcement learning to policy induction attacks","author":"Behzadan","year":"2017"},{"key":"10.1016\/j.knosys.2021.106967_b36","series-title":"Adversarial examples in the physical world","first-page":"1","author":"Kurakin","year":"2016"},{"key":"10.1016\/j.knosys.2021.106967_b37","doi-asserted-by":"crossref","unstructured":"N. Papernot, P. McDaniel, S. Jha, M. Fredrikson, Z.B. Celik, A. Swami, The limitations of deep learning in adversarial settings, in: IEEE European Symposium on Security and Privacy, 2016, pp. 372\u2013387.","DOI":"10.1109\/EuroSP.2016.36"},{"key":"10.1016\/j.knosys.2021.106967_b38","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2020.104021","article-title":"Learning adversarial attack policies through multi-objective reinforcement learning","volume":"96","author":"Garc\u00eda","year":"2020","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.knosys.2021.106967_b39","series-title":"Sequential attacks on agents for long-term adversarial goals","first-page":"1","author":"Tretschk","year":"2018"},{"issue":"1","key":"10.1016\/j.knosys.2021.106967_b40","first-page":"1","article-title":"Learning to attack: Adversarial transformation networks","volume":"32","author":"Baluja","year":"2018","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"10.1016\/j.knosys.2021.106967_b41","series-title":"Trojdrl: Trojan attacks on deep reinforcement learning agents","first-page":"1","author":"Kiourti","year":"2019"},{"key":"10.1016\/j.knosys.2021.106967_b42","series-title":"Adversarial training methods for semi-supervised text classification","first-page":"1","author":"Miyato","year":"2016"},{"key":"10.1016\/j.knosys.2021.106967_b43","series-title":"International Conference on Decision and Game Theory for Security","first-page":"145","article-title":"Reinforcement learning for autonomous defence in software-defined networking","author":"Han","year":"2018"},{"key":"10.1016\/j.knosys.2021.106967_b44","series-title":"Whatever does not kill deep reinforcement learning, makes it stronger","first-page":"1","author":"Behzadan","year":"2017"},{"key":"10.1016\/j.knosys.2021.106967_b45","series-title":"International Conference on Computer Safety, Reliability, and Security","first-page":"406","article-title":"Mitigation of policy manipulation attacks on deep Q-networks with parameter-space noise","author":"Behzadan","year":"2018"},{"key":"10.1016\/j.knosys.2021.106967_b46","unstructured":"M. Fortunato, M.G. Azar, B. Piot, J. Menick, I. Osband, A. Graves, V. Mnih, R. Munos, D. Hassabis, O. Pietquin, et al. Noisy networks for exploration, in: International Conference on Learning Representations, 2017, pp. 1\u201321."},{"key":"10.1016\/j.knosys.2021.106967_b47","doi-asserted-by":"crossref","unstructured":"S. Moosavi-Dezfooli, A. Fawzi, O. Fawzi, P. Frossard, Universal adversarial perturbations, in: Conference on Computer Vision and Pattern, 2017, pp. 1765\u20131773.","DOI":"10.1109\/CVPR.2017.17"},{"key":"10.1016\/j.knosys.2021.106967_b48","series-title":"Wasserstein robust reinforcement learning","first-page":"1","author":"Abdullah","year":"2019"},{"key":"10.1016\/j.knosys.2021.106967_b49","series-title":"Convergence problems with generative adversarial networks (gans)","first-page":"1","author":"Barnett","year":"2018"},{"key":"10.1016\/j.knosys.2021.106967_b50","series-title":"Action robust reinforcement learning and applications in continuous control","first-page":"1","author":"Tessler","year":"2019"},{"key":"10.1016\/j.knosys.2021.106967_b51","series-title":"Improving robustness via risk averse distributional reinforcement learning","first-page":"1","author":"Singh","year":"2020"},{"key":"10.1016\/j.knosys.2021.106967_b52","series-title":"Detecting adversarial attacks on neural network policies with visual foresight","author":"Lin","year":"2017"},{"key":"10.1016\/j.knosys.2021.106967_b53","first-page":"9916","article-title":"Online robust policy learning in the presence of unknown adversaries","volume":"31","author":"Havens","year":"2018","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.knosys.2021.106967_b54","series-title":"Generating adversarial examples with adversarial networks","first-page":"1","author":"Xiao","year":"2018"},{"key":"10.1016\/j.knosys.2021.106967_b55","doi-asserted-by":"crossref","unstructured":"S. Jandial, P. Mangla, S. Varshney, V. Balasubramanian, Advgan++: Harnessing latent layers for adversary generation, in: International Conference on Computer Vision Workshops, 2019, pp. 1\u20134.","DOI":"10.1109\/ICCVW.2019.00257"},{"key":"10.1016\/j.knosys.2021.106967_b56","series-title":"Generating natural adversarial examples","first-page":"1","author":"Zhao","year":"2017"},{"key":"10.1016\/j.knosys.2021.106967_b57","unstructured":"P. Samangouei, M. Kabkab, R. Chellappa, Defense-gan: Protecting classifiers against adversarial attacks using generative models, in: International Conference on Machine Learning, 2018, pp. 1\u201317."},{"key":"10.1016\/j.knosys.2021.106967_b58","series-title":"Advances in Neural Information Processing Systems","first-page":"700","article-title":"Unsupervised image-to-image translation networks","author":"Liu","year":"2017"},{"key":"10.1016\/j.knosys.2021.106967_b59","unstructured":"A.B.L. Larsen, S.K. S\u00f8nderby, H. Larochelle, O. Winther, Autoencoding beyond pixels using a learned similarity metric, in: International Conference on Machine Learning, 2016, pp. 1558\u20131566."},{"key":"10.1016\/j.knosys.2021.106967_b60","series-title":"Auto-encoding variational bayes","first-page":"1","author":"Kingma","year":"2013"},{"key":"10.1016\/j.knosys.2021.106967_b61","doi-asserted-by":"crossref","unstructured":"S. Rifai, P. Vincent, X. Muller, X. Glorot, Y. Bengio, Contractive auto-encoders: Explicit invariance during feature extraction, in: International Conference on Machine Learning, 2011, pp. 833\u2013840.","DOI":"10.1007\/978-3-642-23783-6_41"},{"key":"10.1016\/j.knosys.2021.106967_b62","series-title":"Advances in Neural Information Processing Systems","first-page":"2672","article-title":"Generative adversarial nets","author":"Goodfellow","year":"2014"},{"key":"10.1016\/j.knosys.2021.106967_b63","series-title":"Wasserstein gan","first-page":"1","author":"Arjovsky","year":"2017"},{"key":"10.1016\/j.knosys.2021.106967_b64","series-title":"Openai gym","first-page":"1","author":"Brockman","year":"2016"}],"container-title":["Knowledge-Based Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705121002306?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705121002306?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2023,3,11]],"date-time":"2023-03-11T14:23:08Z","timestamp":1678544588000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0950705121002306"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6]]},"references-count":64,"alternative-id":["S0950705121002306"],"URL":"https:\/\/doi.org\/10.1016\/j.knosys.2021.106967","relation":{},"ISSN":["0950-7051"],"issn-type":[{"value":"0950-7051","type":"print"}],"subject":[],"published":{"date-parts":[[2021,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"RL-VAEGAN: Adversarial defense for reinforcement learning agents via style transfer","name":"articletitle","label":"Article Title"},{"value":"Knowledge-Based Systems","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.knosys.2021.106967","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2021 Elsevier B.V. All rights reserved.","name":"copyright","label":"Copyright"}],"article-number":"106967"}}