{"id":"https://openalex.org/W4402466902","doi":"https://doi.org/10.48550/arxiv.2407.18517","title":"SLIM: Style-Linguistics Mismatch Model for Generalized Audio Deepfake\n Detection","display_name":"SLIM: Style-Linguistics Mismatch Model for Generalized Audio Deepfake\n Detection","publication_year":2024,"publication_date":"2024-07-26","ids":{"openalex":"https://openalex.org/W4402466902","doi":"https://doi.org/10.48550/arxiv.2407.18517"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.18517","pdf_url":"http://arxiv.org/pdf/2407.18517","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2407.18517","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019191351","display_name":"Yi Zhu","orcid":"https://orcid.org/0000-0002-6482-6712"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Yi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092639627","display_name":"Surya Koppisetti","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Koppisetti, Surya","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020713738","display_name":"Trang Tran","orcid":"https://orcid.org/0009-0009-1116-4442"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tran, Trang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5036255713","display_name":"Gaurav Bharaj","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bharaj, Gaurav","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9892,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9892,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.962,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9034,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C2776445246","wikidata":"https://www.wikidata.org/wiki/Q1792644","display_name":"Style (visual arts)","level":2,"score":0.73038286},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.53847605},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.48640254},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4102331},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.14075756},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.12183434},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.064569265}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.18517","pdf_url":"http://arxiv.org/pdf/2407.18517","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.18517","pdf_url":"http://arxiv.org/pdf/2407.18517","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4396701345","https://openalex.org/W4396696052","https://openalex.org/W4395014643","https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2382290278","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2001405890"],"abstract_inverted_index":{"Audio":[0],"deepfake":[1],"detection":[2],"(ADD)":[3],"is":[4],"crucial":[5],"to":[6,74,89,113,150],"combat":[7],"the":[8,35,67,91,95,118,124,152,167],"misuse":[9],"of":[10,38,166],"speech":[11,73],"synthesized":[12],"from":[13,21,77],"generative":[14],"AI":[15],"models.":[16],"Existing":[17],"ADD":[18,62],"models":[19,40],"suffer":[20],"generalization":[22],"issues,":[23,57],"with":[24,106],"a":[25,60,115,160],"large":[26],"performance":[27],"discrepancy":[28],"between":[29,154],"in-domain":[30,141],"and":[31,120,156],"out-of-domain":[32,134],"data.":[33,142],"Moreover,":[34],"black-box":[36],"nature":[37],"existing":[39],"limits":[41],"their":[42],"use":[43],"in":[44,71,94,104,159],"real-world":[45],"scenarios,":[46],"where":[47],"explanations":[48],"are":[49,101,127],"required":[50],"for":[51],"model":[52,63,168],"decisions.":[53],"To":[54],"alleviate":[55],"these":[56],"we":[58],"introduce":[59],"new":[61],"that":[64],"explicitly":[65],"uses":[66],"StyleLInguistics":[68],"Mismatch":[69],"(SLIM)":[70],"fake":[72,121],"separate":[75],"them":[76],"real":[78,87,96,119],"speech.":[79],"SLIM":[80,129,147],"first":[81],"employs":[82],"self-supervised":[83],"pretraining":[84],"on":[85,117,133,140],"only":[86],"samples":[88],"learn":[90,114],"style-linguistics":[92],"dependency":[93],"class.":[97],"The":[98,143],"learned":[99,145],"features":[100,110,144],"then":[102],"used":[103],"complement":[105],"standard":[107],"pretrained":[108],"acoustic":[109],"(e.g.,":[111],"Wav2vec)":[112],"classifier":[116],"classes.":[122],"When":[123],"feature":[125],"encoders":[126],"frozen,":[128],"outperforms":[130],"benchmark":[131],"methods":[132],"datasets":[135],"while":[136],"achieving":[137],"competitive":[138],"results":[139],"by":[146],"allow":[148],"us":[149],"quantify":[151],"(mis)match":[153],"style":[155],"linguistic":[157],"content":[158],"sample,":[161],"hence":[162],"facilitating":[163],"an":[164],"explanation":[165],"decision.":[169]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4402466902","counts_by_year":[],"updated_date":"2024-12-15T13:59:33.250809","created_date":"2024-09-12"}