{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,11,19]],"date-time":"2024-11-19T19:04:14Z","timestamp":1732043054443,"version":"3.28.0"},"publisher-location":"New York, NY, USA","reference-count":33,"publisher":"ACM","funder":[{"name":"National Science Foundation","award":["CNS-2154873"]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,16]]},"DOI":"10.1145\/3607199.3607242","type":"proceedings-article","created":{"date-parts":[[2023,10,3]],"date-time":"2023-10-03T22:30:51Z","timestamp":1696372251000},"page":"654-668","update-policy":"http:\/\/dx.doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":32,"title":["DiverseVul: A New Vulnerable Source Code Dataset for Deep Learning Based Vulnerability Detection"],"prefix":"10.1145","author":[{"ORCID":"http:\/\/orcid.org\/0000-0002-2019-5955","authenticated-orcid":false,"given":"Yizheng","family":"Chen","sequence":"first","affiliation":[{"name":"University of Maryland, United States of America"}]},{"ORCID":"http:\/\/orcid.org\/0009-0006-5467-2592","authenticated-orcid":false,"given":"Zhoujie","family":"Ding","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, USA"}]},{"ORCID":"http:\/\/orcid.org\/0009-0000-4207-2369","authenticated-orcid":false,"given":"Lamya","family":"Alowain","sequence":"additional","affiliation":[{"name":"King Abdulaziz City for Science and Technology, Saudi Arabia"}]},{"ORCID":"http:\/\/orcid.org\/0000-0001-8379-902X","authenticated-orcid":false,"given":"Xinyun","family":"Chen","sequence":"additional","affiliation":[{"name":"Google Deepmind, United States of America"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-9944-9232","authenticated-orcid":false,"given":"David","family":"Wagner","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,10,16]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/1646353.1646374"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3475960.3475985"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3540250.3549162"},{"volume-title":"Deep learning based vulnerability detection: Are we there yet","year":"2021","author":"Chakraborty Saikat","key":"e_1_3_2_1_4_1","unstructured":"Saikat Chakraborty, Rahul Krishna, Yangruibo Ding, and Baishakhi Ray. 2021. Deep learning based vulnerability detection: Are we there yet. IEEE Transactions on Software Engineering (2021)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3508398.3511495"},{"volume-title":"Last accessed on","year":"2023","author":"The\u00a0MITRE Corporation","key":"e_1_3_2_1_6_1","unstructured":"The\u00a0MITRE Corporation. Last accessed on March 28, 2023. 2022 CWE Top 25 Most Dangerous Software Weaknesses. https:\/\/cwe.mitre.org\/top25\/archive\/2022\/2022_cwe_top25.html"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00022"},{"volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","year":"2018","author":"Devlin Jacob","key":"e_1_3_2_1_8_1","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3379597.3387501"},{"volume-title":"Codebert: A pre-trained model for programming and natural languages. arXiv preprint arXiv:2002.08155","year":"2020","author":"Feng Zhangyin","key":"e_1_3_2_1_10_1","unstructured":"Zhangyin Feng, Daya Guo, Duyu Tang, Nan Duan, Xiaocheng Feng, Ming Gong, Linjun Shou, Bing Qin, Ting Liu, Daxin Jiang, 2020. Codebert: A pre-trained model for programming and natural languages. arXiv preprint arXiv:2002.08155 (2020)."},{"volume-title":"Graphcodebert: Pre-training code representations with data flow. arXiv preprint arXiv:2009.08366","year":"2020","author":"Guo Daya","key":"e_1_3_2_1_11_1","unstructured":"Daya Guo, Shuo Ren, Shuai Lu, Zhangyin Feng, Duyu Tang, Shujie Liu, Long Zhou, Nan Duan, Alexey Svyatkovskiy, Shengyu Fu, 2020. Graphcodebert: Pre-training code representations with data flow. arXiv preprint arXiv:2009.08366 (2020)."},{"volume-title":"Codesearchnet challenge: Evaluating the state of semantic code search. arXiv preprint arXiv:1909.09436","year":"2019","author":"Husain Hamel","key":"e_1_3_2_1_12_1","unstructured":"Hamel Husain, Ho-Hsiang Wu, Tiferet Gazit, Miltiadis Allamanis, and Marc Brockschmidt. 2019. Codesearchnet challenge: Evaluating the state of semantic code search. arXiv preprint arXiv:1909.09436 (2019)."},{"volume-title":"Gated graph sequence neural networks. arXiv preprint arXiv:1511.05493","year":"2015","author":"Li Yujia","key":"e_1_3_2_1_13_1","unstructured":"Yujia Li, Daniel Tarlow, Marc Brockschmidt, and Richard Zemel. 2015. Gated graph sequence neural networks. arXiv preprint arXiv:1511.05493 (2015)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TDSC.2021.3051525"},{"volume-title":"Vuldeepecker: A deep learning-based system for vulnerability detection. arXiv preprint arXiv:1801.01681","year":"2018","author":"Li Zhen","key":"e_1_3_2_1_15_1","unstructured":"Zhen Li, Deqing Zou, Shouhuai Xu, Xinyu Ou, Hai Jin, Sujuan Wang, Zhijun Deng, and Yuyi Zhong. 2018. Vuldeepecker: A deep learning-based system for vulnerability detection. arXiv preprint arXiv:1801.01681 (2018)."},{"volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692","year":"2019","author":"Liu Yinhan","key":"e_1_3_2_1_16_1","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)."},{"volume-title":"Codexglue: A machine learning benchmark dataset for code understanding and generation. arXiv preprint arXiv:2102.04664","year":"2021","author":"Lu Shuai","key":"e_1_3_2_1_17_1","unstructured":"Shuai Lu, Daya Guo, Shuo Ren, Junjie Huang, Alexey Svyatkovskiy, Ambrosio Blanco, Colin Clement, Dawn Drain, Daxin Jiang, Duyu Tang, 2021. Codexglue: A machine learning benchmark dataset for code understanding and generation. arXiv preprint arXiv:2102.04664 (2021)."},{"volume-title":"USENIX Security","year":"2023","author":"Mirsky Yisroel","key":"e_1_3_2_1_18_1","unstructured":"Yisroel Mirsky, George Macon, Michael Brown, Carter Yagemann, Matthew Pruett, Evan Downing, Sukarno Mertoguno, and Wenke Lee. 2023. VulChecker: Graph-based Vulnerability Localization in Source Code. In USENIX Security 2023."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3468264.3473122"},{"volume-title":"Last accessed on","year":"2023","author":"National\u00a0Institute of Standards and Technology.","key":"e_1_3_2_1_20_1","unstructured":"National\u00a0Institute of Standards and Technology. Last accessed on March 19, 2023. National Vulnerability Database. https:\/\/nvd.nist.gov\/"},{"volume-title":"Last accessed on","year":"2023","author":"National\u00a0Institute of Standards and Technology.","key":"e_1_3_2_1_21_1","unstructured":"National\u00a0Institute of Standards and Technology. Last accessed on March 19, 2023. NIST Software Assurance Reference Dataset. https:\/\/samate.nist.gov\/SARD"},{"key":"e_1_3_2_1_22_1","first-page":"297","article-title":"Report on the static analysis tool exposition (sate) iv","volume":"500","author":"Okun Vadim","year":"2013","unstructured":"Vadim Okun, Aurelien Delaitre, Paul\u00a0E Black, 2013. Report on the static analysis tool exposition (sate) iv. NIST Special Publication 500 (2013), 297.","journal-title":"NIST Special Publication"},{"volume-title":"Language models are unsupervised multitask learners. OpenAI blog 1, 8","year":"2019","author":"Radford Alec","key":"e_1_3_2_1_23_1","unstructured":"Alec Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei, Ilya Sutskever, 2019. Language models are unsupervised multitask learners. OpenAI blog 1, 8 (2019), 9."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.5555\/3455716.3455856"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA.2018.00120"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00188"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3564625.3567985"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/DSN48987.2021.00030"},{"volume-title":"Codet5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. arXiv preprint arXiv:2109.00859","year":"2021","author":"Wang Yue","key":"e_1_3_2_1_29_1","unstructured":"Yue Wang, Weishi Wang, Shafiq Joty, and Steven\u00a0CH Hoi. 2021. Codet5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. arXiv preprint arXiv:2109.00859 (2021)."},{"volume-title":"A Systematic Evaluation of Large Language Models of Code. arXiv preprint arXiv:2202.13169","year":"2022","author":"Xu F","key":"e_1_3_2_1_30_1","unstructured":"Frank\u00a0F Xu, Uri Alon, Graham Neubig, and Vincent\u00a0J Hellendoorn. 2022. A Systematic Evaluation of Large Language Models of Code. arXiv preprint arXiv:2202.13169 (2022)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2014.44"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-SEIP52600.2021.00020"},{"volume-title":"Devign: Effective vulnerability identification by learning comprehensive program semantics via graph neural networks. Advances in neural information processing systems 32","year":"2019","author":"Zhou Yaqin","key":"e_1_3_2_1_33_1","unstructured":"Yaqin Zhou, Shangqing Liu, Jingkai Siow, Xiaoning Du, and Yang Liu. 2019. Devign: Effective vulnerability identification by learning comprehensive program semantics via graph neural networks. Advances in neural information processing systems 32 (2019)."}],"event":{"name":"RAID 2023: The 26th International Symposium on Research in Attacks, Intrusions and Defenses","acronym":"RAID 2023","location":"Hong Kong China"},"container-title":["Proceedings of the 26th International Symposium on Research in Attacks, Intrusions and Defenses"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3607199.3607242","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,16]],"date-time":"2024-10-16T10:27:53Z","timestamp":1729074473000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3607199.3607242"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,16]]},"references-count":33,"alternative-id":["10.1145\/3607199.3607242","10.1145\/3607199"],"URL":"https:\/\/doi.org\/10.1145\/3607199.3607242","relation":{},"subject":[],"published":{"date-parts":[[2023,10,16]]},"assertion":[{"value":"2023-10-16","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}