{"id":"https://openalex.org/W4403781069","doi":"https://doi.org/10.48550/arxiv.2409.15180","title":"A Comprehensive Survey with Critical Analysis for Deepfake Speech\n Detection","display_name":"A Comprehensive Survey with Critical Analysis for Deepfake Speech\n Detection","publication_year":2024,"publication_date":"2024-09-23","ids":{"openalex":"https://openalex.org/W4403781069","doi":"https://doi.org/10.48550/arxiv.2409.15180"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.15180","pdf_url":"http://arxiv.org/pdf/2409.15180","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2409.15180","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044671183","display_name":"Lam Pham","orcid":"https://orcid.org/0000-0001-8031-7777"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pham, Lam","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056967532","display_name":"P. C. B. Lam","orcid":"https://orcid.org/0009-0003-5105-5976"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lam, Phat","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113084734","display_name":"Tin Nguyen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, Tin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082199118","display_name":"Hieu Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Hieu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100912676","display_name":"Huyen Nguyen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, Huyen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102746568","display_name":"Alexander Schindler","orcid":"https://orcid.org/0000-0001-6058-7753"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schindler, Alexander","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5060392503","display_name":"Canh Vu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vu, Canh","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":82},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9584,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9584,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9502,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/survey-research","display_name":"Survey research","score":0.41971138}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.47686514},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.42932165},{"id":"https://openalex.org/C173481278","wikidata":"https://www.wikidata.org/wiki/Q7257997","display_name":"Survey research","level":2,"score":0.41971138},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.36295843},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2683529},{"id":"https://openalex.org/C75630572","wikidata":"https://www.wikidata.org/wiki/Q538904","display_name":"Applied psychology","level":1,"score":0.12502906}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.15180","pdf_url":"http://arxiv.org/pdf/2409.15180","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.15180","pdf_url":"http://arxiv.org/pdf/2409.15180","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4396701345","https://openalex.org/W4396696052","https://openalex.org/W4391913857","https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2001405890"],"abstract_inverted_index":{"Thanks":[0],"to":[1,59,74,112,116,133,174,195,212],"advancements":[2],"in":[3,26,89,147,178],"deep":[4,192],"learning,":[5],"speech":[6,22,30,40,65],"generation":[7],"systems":[8,35],"now":[9],"power":[10],"a":[11,118,126,135,139,206,218],"variety":[12],"of":[13,142,160,199,225],"real-world":[14],"applications,":[15],"such":[16],"as":[17,75],"text-to-speech":[18],"for":[19,51,62,99,105,222,244],"individuals":[20],"with":[21],"disorders,":[23],"voice":[24],"chatbots":[25],"call":[27],"centers,":[28],"cross-linguistic":[29],"translation,":[31],"etc.":[32],"While":[33],"these":[34,214],"can":[36],"autonomously":[37],"generate":[38],"human-like":[39],"and":[41,145,166,189,216,232,240],"replicate":[42],"specific":[43,191],"voices,":[44],"they":[45],"also":[46],"pose":[47],"risks":[48],"when":[49],"misused":[50],"malicious":[52],"purposes.":[53],"This":[54,129],"motivates":[55],"the":[56,76,82,106,143,167,179,197,223,230,233,245],"research":[57,242],"community":[58],"develop":[60],"models":[61],"detecting":[63],"synthesized":[64],"(e.g.,":[66],"fake":[67],"speech)":[68],"generated":[69],"by":[70],"deep-learning-based":[71],"models,":[72],"referred":[73],"Deepfake":[77,83,107,119,148,200,226,246],"Speech":[78,84,108,120,149,201,227,247],"Detection":[79,85,109,121,202,248],"task.":[80,101,249],"As":[81],"task":[86,110,224],"has":[87],"emerged":[88],"recent":[90],"years,":[91],"there":[92],"are":[93],"not":[94],"many":[95],"survey":[96,152],"papers":[97],"proposed":[98],"this":[100],"Additionally,":[102],"existing":[103,176],"surveys":[104],"tend":[111],"summarize":[113],"techniques":[114,169,194],"used":[115],"construct":[117],"system":[122],"rather":[123],"than":[124],"providing":[125,138],"thorough":[127],"analysis.":[128],"gap":[130],"motivated":[131],"us":[132],"conduct":[134],"comprehensive":[136],"survey,":[137,207],"critical":[140],"analysis":[141,159,231],"challenges":[144,177],"developments":[146],"Detection.":[150,228],"Our":[151],"is":[153],"innovatively":[154],"structured,":[155],"offering":[156],"an":[157],"in-depth":[158],"current":[161],"challenge":[162],"competitions,":[163],"public":[164],"datasets,":[165],"deep-learning":[168],"that":[170],"provide":[171],"enhanced":[172],"solutions":[173],"address":[175],"field.":[180],"From":[181],"our":[182],"analysis,":[183],"we":[184,208,236],"propose":[185,217],"hypotheses":[186,215],"on":[187],"leveraging":[188],"combining":[190],"learning":[193],"improve":[196],"effectiveness":[198],"systems.":[203],"Beyond":[204],"conducting":[205],"perform":[209],"extensive":[210],"experiments":[211],"validate":[213],"highly":[219],"competitive":[220],"model":[221],"Given":[229],"experimental":[234],"results,":[235],"finally":[237],"indicate":[238],"potential":[239],"promising":[241],"directions":[243]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4403781069","counts_by_year":[],"updated_date":"2025-01-22T16:40:31.442619","created_date":"2024-10-26"}