{"id":"https://openalex.org/W4403182579","doi":"https://doi.org/10.1109/is262782.2024.10704095","title":"Deepfake Audio Detection Using Spectrogram-based Feature and Ensemble of Deep Learning Models","display_name":"Deepfake Audio Detection Using Spectrogram-based Feature and Ensemble of Deep Learning Models","publication_year":2024,"publication_date":"2024-09-30","ids":{"openalex":"https://openalex.org/W4403182579","doi":"https://doi.org/10.1109/is262782.2024.10704095"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/is262782.2024.10704095","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2407.01777","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111250608","display_name":"Lam Pham","orcid":null},"institutions":[{"id":"https://openalex.org/I132118926","display_name":"Austrian Institute of Technology","ror":"https://ror.org/04knbh022","country_code":"AT","type":"funder","lineage":["https://openalex.org/I132118926"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Lam Pham","raw_affiliation_strings":["Austrian Institute of Technology, Vienna, Austria"],"affiliations":[{"raw_affiliation_string":"Austrian Institute of Technology, Vienna, Austria","institution_ids":["https://openalex.org/I132118926"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056967532","display_name":"P. C. B. Lam","orcid":"https://orcid.org/0009-0003-5105-5976"},"institutions":[{"id":"https://openalex.org/I47265099","display_name":"Ho Chi Minh City University of Technology","ror":"https://ror.org/04qva2324","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023","https://openalex.org/I47265099"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Phat Lam","raw_affiliation_strings":["HCM University of Technology, Ho Chi Minh city, Vietnam"],"affiliations":[{"raw_affiliation_string":"HCM University of Technology, Ho Chi Minh city, Vietnam","institution_ids":["https://openalex.org/I47265099"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101495770","display_name":"Truong Thanh Nguyen","orcid":"https://orcid.org/0000-0003-3139-4105"},"institutions":[{"id":"https://openalex.org/I47265099","display_name":"Ho Chi Minh City University of Technology","ror":"https://ror.org/04qva2324","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023","https://openalex.org/I47265099"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Truong Nguyen","raw_affiliation_strings":["HCM University of Technology, Ho Chi Minh city, Vietnam"],"affiliations":[{"raw_affiliation_string":"HCM University of Technology, Ho Chi Minh city, Vietnam","institution_ids":["https://openalex.org/I47265099"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100912676","display_name":"Huyen Nguyen","orcid":null},"institutions":[{"id":"https://openalex.org/I92614990","display_name":"Tokyo University of Agriculture and Technology","ror":"https://ror.org/00qg0kr10","country_code":"JP","type":"funder","lineage":["https://openalex.org/I92614990"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Huyen Nguyen","raw_affiliation_strings":["Tokyo University of Agriculture and Technology, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Tokyo University of Agriculture and Technology, Tokyo, Japan","institution_ids":["https://openalex.org/I92614990"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102746568","display_name":"Alexander Schindler","orcid":"https://orcid.org/0000-0001-6058-7753"},"institutions":[{"id":"https://openalex.org/I132118926","display_name":"Austrian Institute of Technology","ror":"https://ror.org/04knbh022","country_code":"AT","type":"funder","lineage":["https://openalex.org/I132118926"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Alexander Schindler","raw_affiliation_strings":["Austrian Institute of Technology, Vienna, Austria"],"affiliations":[{"raw_affiliation_string":"Austrian Institute of Technology, Vienna, Austria","institution_ids":["https://openalex.org/I132118926"]}]}],"institution_assertions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.547,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.796846,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":92,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9142,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9142,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.97273266},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.67416507},{"id":"https://openalex.org/keywords/ensemble-learning","display_name":"Ensemble Learning","score":0.51865524}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.97273266},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.73704934},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.67416507},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5673053},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.561002},{"id":"https://openalex.org/C45942800","wikidata":"https://www.wikidata.org/wiki/Q245652","display_name":"Ensemble learning","level":2,"score":0.51865524},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.43268389},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.42919385},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.40205666},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/is262782.2024.10704095","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.01777","pdf_url":"http://arxiv.org/pdf/2407.01777","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.01777","pdf_url":"http://arxiv.org/pdf/2407.01777","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":19,"referenced_works":["https://openalex.org/W2108598243","https://openalex.org/W2120847449","https://openalex.org/W2123299109","https://openalex.org/W2401839215","https://openalex.org/W2967606780","https://openalex.org/W2972526452","https://openalex.org/W3032636897","https://openalex.org/W3033711348","https://openalex.org/W3167533889","https://openalex.org/W4221138880","https://openalex.org/W4309049624","https://openalex.org/W4311681683","https://openalex.org/W4319977731","https://openalex.org/W4323519173","https://openalex.org/W4381854917","https://openalex.org/W4385822353","https://openalex.org/W4385822356","https://openalex.org/W4386302282","https://openalex.org/W4394019163"],"related_works":["https://openalex.org/W4402568167","https://openalex.org/W4375868962","https://openalex.org/W4308112567","https://openalex.org/W3179495260","https://openalex.org/W3162132941","https://openalex.org/W3124943098","https://openalex.org/W2530685530","https://openalex.org/W2088854863","https://openalex.org/W2011227383","https://openalex.org/W1976719989"],"abstract_inverted_index":{"In":[0,44,161],"this":[1],"paper,":[2],"we":[3,91,165],"propose":[4],"a":[5,18,93,195],"deep-learning-based":[6],"system":[7],"for":[8,24],"the":[9,21,46,89,111,131,137,162,167,184,188,219,252,260,274],"task":[10,275],"of":[11,20,60,77,96,118,172,242,262,276],"deepfake":[12,278],"audio":[13,49,169,180,189,205,277],"detection.":[14,279],"This":[15],"work":[16],"is":[17,35,50,108,134,245],"part":[19],"proposed":[22,115,225],"toolchain":[23],"speech":[25],"analysis":[26],"in":[27,42,251],"EUCINF":[28],"(EUropean":[29],"Cyber":[30],"and":[31,83,158,176,265],"INFormation)":[32],"project,":[33],"which":[34,244],"an":[36,237],"European":[37],"project":[38],"with":[39,72],"multiple":[40],"partners":[41],"Europe.":[43],"particular,":[45],"raw":[47],"input":[48,185],"first":[51,106],"transformed":[52],"into":[53],"various":[54],"spectrograms":[55,112,264],"using":[56,113],"three":[57,101],"transformation":[58],"methods":[59],"Short-time":[61],"Fourier":[62],"Transform":[63,66,69],"(STFT),":[64],"Constant-Q":[65],"(CQT),":[67],"Wavelet":[68],"(WT)":[70],"combined":[71],"different":[73],"auditory-":[74],"based":[75,99],"filters":[76,81],"Mel,":[78],"Gammatone,":[79],"linear":[80],"(LF),":[82],"discrete":[84],"cosine":[85],"transform":[86],"(DCT).":[87],"Given":[88],"spectrograms,":[90],"evaluate":[92],"wide":[94],"range":[95],"classification":[97],"models":[98,117,143,171,211,226],"on":[100,227,273],"deep":[102,209,266],"learning":[103,139,210,267],"approaches.":[104],"The":[105],"approach":[107,133],"to":[109,135,178,200,217,248,269],"train":[110],"our":[114,224],"baseline":[116],"CNN-based":[119],"model":[120,124,127,199,235,271],"(CNN-":[121],"baseline),":[122],"RNN-based":[123],"(RNN-baseline),":[125],"C-RNN":[126],"(C-RNN":[128],"baseline).":[129],"Meanwhile,":[130],"second":[132],"apply":[136],"transfer":[138],"from":[140,183,212],"computer":[141],"vision":[142],"such":[144],"as":[145],"ResNet-":[146],"18,":[147],"MobileNet-V3,":[148],"EfficientNet-BO,":[149],"DenseNet-121,":[150],"SuffleNet-":[151],"V2,":[152],"Swint,":[153],"Convnext-":[154],"Tiny,":[155],"GoogLeNet,":[156],"MNASsnet,":[157],"Reg-":[159],"Net.":[160],"third":[163],"approach,":[164],"leverage":[166],"state-of-the-art":[168],"pre-trained":[170],"Whisper,":[173],"Seamless,":[174],"Speechbrain,":[175],"Pyannote":[177],"extract":[179],"embed":[181,190],"dings":[182,191],"spectrograms.":[186],"Then,":[187],"are":[192,215],"explored":[193],"by":[194],"Multilayer":[196],"perceptron":[197],"(MLP)":[198],"detect":[201],"fake":[202],"or":[203],"real":[204],"samples.":[206],"Finally,":[207],"high-performance":[208],"these":[213],"approaches":[214,268],"fused":[216],"achieve":[218],"best":[220,233],"performance.":[221],"We":[222],"evaluated":[223],"ASVspoof":[228],"2019":[229,254],"benchmark":[230],"dataset.":[231],"Our":[232],"ensemble":[234],"achieved":[236],"Equal":[238],"Error":[239],"Rate":[240],"(EER)":[241],"0.03,":[243],"highly":[246],"competitive":[247],"top-performing":[249],"systems":[250],"ASVspoofing":[253],"challenge.":[255],"Experimental":[256],"results":[257],"also":[258],"highlight":[259],"potential":[261],"selective":[263],"enhance":[270],"performance":[272]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4403182579","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-04-25T22:16:38.340832","created_date":"2024-10-08"}