{"id":"https://openalex.org/W4391376577","doi":"https://doi.org/10.48550/arxiv.2401.15897","title":"Red-Teaming for Generative AI: Silver Bullet or Security Theater?","display_name":"Red-Teaming for Generative AI: Silver Bullet or Security Theater?","publication_year":2024,"publication_date":"2024-01-29","ids":{"openalex":"https://openalex.org/W4391376577","doi":"https://doi.org/10.48550/arxiv.2401.15897"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2401.15897","pdf_url":"http://arxiv.org/pdf/2401.15897","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2401.15897","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074082280","display_name":"Michael Feffer","orcid":"https://orcid.org/0000-0002-5243-472X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feffer, Michael","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093826591","display_name":"Anusha Sinha","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sinha, Anusha","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029448258","display_name":"Zachary C. Lipton","orcid":"https://orcid.org/0000-0002-3824-4241"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lipton, Zachary C.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5037735812","display_name":"Hoda Heidari","orcid":"https://orcid.org/0000-0003-3710-4076"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Heidari, Hoda","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":77},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.3345,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.3345,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/silver-bullet","display_name":"Silver bullet","score":0.95097697}],"concepts":[{"id":"https://openalex.org/C2776088982","wikidata":"https://www.wikidata.org/wiki/Q841402","display_name":"Silver bullet","level":2,"score":0.95097697},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.541052},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.37352026},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.34712788},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.26811984},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.15721115},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2401.15897","pdf_url":"http://arxiv.org/pdf/2401.15897","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2401.15897","pdf_url":"http://arxiv.org/pdf/2401.15897","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2539773025","https://openalex.org/W2390279801","https://openalex.org/W2382290278","https://openalex.org/W2380075625","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2075472017","https://openalex.org/W2012977356","https://openalex.org/W2001405890"],"abstract_inverted_index":{"In":[0,82,170],"response":[1],"to":[2,22,70,106,238],"rising":[3],"concerns":[4],"surrounding":[5],"the":[6,78,93,102,108,133,136,142,146,150,160,198],"safety,":[7],"security,":[8],"and":[9,17,34,47,65,96,111,123,157,159,168,193,240],"trustworthiness":[10],"of":[11,29,80,89,101,125,135,172,191,200,224],"Generative":[12],"AI":[13,23,40,94,114,126,243],"(GenAI)":[14],"models,":[15,202],"practitioners":[16],"regulators":[18],"alike":[19],"have":[20],"pointed":[21],"red-teaming":[24,72,90,115,127,179,205,244],"as":[25,74,206],"a":[26,182,188,207,220,234],"key":[27],"component":[28],"their":[30],"strategies":[31],"for":[32,113,186,209,226],"identifying":[33],"mitigating":[35],"these":[36],"risks.":[37],"However,":[38],"despite":[39],"red-teaming's":[41],"central":[42],"role":[43,59],"in":[44,63,77,92,148],"policy":[45],"discussions":[46],"corporate":[48],"messaging,":[49],"significant":[50],"questions":[51],"remain":[52],"about":[53],"what":[54,58],"precisely":[55,67],"it":[56,60,68,163],"means,":[57],"can":[61],"play":[62],"regulation,":[64],"how":[66],"relates":[69],"conventional":[71],"practices":[73,124],"originally":[75],"conceived":[76],"field":[79],"cybersecurity.":[81],"this":[83],"work,":[84],"we":[85,175,229],"identify":[86],"recent":[87],"cases":[88],"activities":[91,192],"industry":[95],"conduct":[97],"an":[98],"extensive":[99],"survey":[100],"relevant":[103],"research":[104],"literature":[105],"characterize":[107],"scope,":[109],"structure,":[110],"criteria":[112],"practices.":[116,245],"Our":[117],"analysis":[118],"reveals":[119],"that":[120,177],"prior":[121],"methods":[122],"diverge":[128],"along":[129],"several":[130],"axes,":[131],"including":[132],"purpose":[134],"activity":[137,151],"(which":[138],"is":[139,152],"often":[140],"vague),":[141],"artifact":[143],"under":[144],"evaluation,":[145],"setting":[147],"which":[149],"conducted":[153],"(e.g.,":[154,165],"actors,":[155],"resources,":[156],"methods),":[158],"resulting":[161],"decisions":[162],"informs":[164],"reporting,":[166],"disclosure,":[167],"mitigation).":[169],"light":[171],"our":[173,231],"findings,":[174],"argue":[176],"while":[178],"may":[180],"be":[181],"valuable":[183],"big-tent":[184],"idea":[185],"characterizing":[187],"broad":[189],"set":[190],"attitudes":[194],"aimed":[195],"at":[196],"improving":[197],"behavior":[199],"GenAI":[201],"gestures":[203],"towards":[204],"panacea":[208],"every":[210],"possible":[211],"risk":[212],"verge":[213],"on":[214],"security":[215],"theater.":[216],"To":[217],"move":[218],"toward":[219],"more":[221],"robust":[222],"toolbox":[223],"evaluations":[225],"generative":[227],"AI,":[228],"synthesize":[230],"recommendations":[232],"into":[233],"question":[235],"bank":[236],"meant":[237],"guide":[239],"scaffold":[241],"future":[242]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4391376577","counts_by_year":[],"updated_date":"2025-04-17T13:46:58.653031","created_date":"2024-01-31"}