{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T11:13:10Z","timestamp":1730200390638,"version":"3.28.0"},"reference-count":34,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,12,15]],"date-time":"2023-12-15T00:00:00Z","timestamp":1702598400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,12,15]],"date-time":"2023-12-15T00:00:00Z","timestamp":1702598400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,12,15]]},"DOI":"10.1109\/bigdata59044.2023.10386735","type":"proceedings-article","created":{"date-parts":[[2024,1,22]],"date-time":"2024-01-22T18:28:47Z","timestamp":1705948127000},"page":"4854-4862","source":"Crossref","is-referenced-by-count":0,"title":["Toward training NLP models to take into account privacy leakages"],"prefix":"10.1109","author":[{"given":"Gaspard","family":"Berthelier","sequence":"first","affiliation":[{"name":"Université Paris Saclay,CentraleSupélec,Grenoble,France"}]},{"given":"Antoine","family":"Boutet","sequence":"additional","affiliation":[{"name":"Univ Lyon, INSA Lyon, Inria, CITI,Lyon,France"}]},{"given":"Antoine","family":"Richard","sequence":"additional","affiliation":[{"name":"Hospices Civil de Lyon,DSN Bron,Grenoble,France"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/BigData55660.2022.10020513"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/BigData55660.2022.10020685"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/BigData55660.2022.10020569"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3077350"},{"article-title":"Deidentification of free-text medical records using pre-trained bidirectional transformers","year":"2020","author":"J","key":"ref5"},{"article-title":"Development and validation of a natural language processing algorithm to pseudonymize documents in the context of a clinical data warehouse","year":"2023","author":"Tannier","key":"ref6"},{"key":"ref7","article-title":"Anonymisation de documents m\u00e9dicaux en texte libre et en fran\u00e7ais via r\u00e9seaux de neurones","author":"Richard","year":"2023","journal-title":"Plate-forme Intelligence Artificielle 2023 (PFIA2023)-Journ\u00e9e Sant\u00e9 & IA"},{"article-title":"Learning structures of the french clinical language:development and validation of word embedding models using 21 million clinical reports from electronic health records","year":"2022","author":"Dura","key":"ref8"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533088"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.73"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3045078"},{"key":"ref12","article-title":"Counterfactual memorization in neural language models","volume":"abs\/2112.12938","author":"Zhang","year":"2021","journal-title":"CoRR"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.56553\/popets-2023-0070"},{"article-title":"Dataset reconstruction attack against language models","year":"2021","author":"Panchendrarajan","key":"ref14"},{"article-title":"Membership inference attack susceptibility of clinical language models","year":"2021","author":"Jagannatha","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.570"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/BigData55660.2022.10020711"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/2976749.2978318"},{"key":"ref19","article-title":"Attention is all you need","volume":"abs\/1706.03762","author":"Vaswani","year":"2017","journal-title":"CoRR"},{"article-title":"The illustrated transformer","year":"2018","author":"Alammar","key":"ref20"},{"article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","year":"2019","author":"Devlin","key":"ref21"},{"year":"2023","key":"ref22","article-title":"Gpt-4 technical report"},{"key":"ref23","article-title":"Reinforcement learning for mitigating toxicity in neural dialogue systems","volume-title":"Ph.D. dissertation","author":"Faal","year":"2022"},{"article-title":"Roberta: A robustly optimized bert pretraining approach","year":"2019","author":"Liu","key":"ref24"},{"article-title":"Distilbert, a distilled version of bert: smaller, faster, cheaper and lighter","year":"2020","author":"Sanh","key":"ref25"},{"key":"ref26","article-title":"Camembert: a tasty french language model","volume":"abs\/1911.03894","author":"Martin","year":"2019","journal-title":"CoRR"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICHI.2019.8904544"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1186\/s12911-020-1026-2"},{"article-title":"De-identification of french unstructured clinical notes for machine learning tasks","year":"2022","author":"Tchouka","key":"ref29"},{"key":"ref30","article-title":"Extracting training data from large language models","volume":"abs\/2012.07805","author":"Carlini","year":"2020","journal-title":"CoRR"},{"article-title":"Quantifying memorization across neural language models","year":"2023","author":"Carlini","key":"ref31"},{"key":"ref32","article-title":"Membership inference attacks against machine learning models","volume":"abs\/1610.05820","author":"Shokri","year":"2016","journal-title":"CoRR"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W19-5006"},{"author":"Face","key":"ref34","article-title":"The platform where the machine learning community collaborates on models, datasets, and applications"}],"event":{"name":"2023 IEEE International Conference on Big Data (BigData)","start":{"date-parts":[[2023,12,15]]},"location":"Sorrento, Italy","end":{"date-parts":[[2023,12,18]]}},"container-title":["2023 IEEE International Conference on Big Data (BigData)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10385234\/10386078\/10386735.pdf?arnumber=10386735","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T01:46:06Z","timestamp":1706751966000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10386735\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,15]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/bigdata59044.2023.10386735","relation":{},"subject":[],"published":{"date-parts":[[2023,12,15]]}}}