{"id":"https://openalex.org/W4398131482","doi":"https://doi.org/10.1145/3643833.3656133","title":"From Sound to Sight: Audio-Visual Fusion and Deep Learning for Drone Detection","display_name":"From Sound to Sight: Audio-Visual Fusion and Deep Learning for Drone Detection","publication_year":2024,"publication_date":"2024-05-20","ids":{"openalex":"https://openalex.org/W4398131482","doi":"https://doi.org/10.1145/3643833.3656133"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3643833.3656133","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5098709742","display_name":"Ildi Alla","orcid":"https://orcid.org/0009-0008-6290-6998"},"institutions":[{"id":"https://openalex.org/I4210138412","display_name":"Centre de recherche Inria Lille - Nord Europe","ror":"https://ror.org/04eej9726","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1326498283","https://openalex.org/I4210138412"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Ildi Alla","raw_affiliation_strings":["Inria Lille-Nord Europe, Lille, France"],"affiliations":[{"raw_affiliation_string":"Inria Lille-Nord Europe, Lille, France","institution_ids":["https://openalex.org/I4210138412"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021042780","display_name":"Herv\u00e9 B. Olou","orcid":"https://orcid.org/0000-0002-2607-2809"},"institutions":[{"id":"https://openalex.org/I4210119417","display_name":"Universit\u00e9 d'Abomey-Calavi","ror":"https://ror.org/03gzr6j88","country_code":"BJ","type":"education","lineage":["https://openalex.org/I4210119417"]}],"countries":["BJ"],"is_corresponding":false,"raw_author_name":"Herv\u00e9 B. Olou","raw_affiliation_strings":["University of Abomey-Calavi, Porto-Novo, Benin"],"affiliations":[{"raw_affiliation_string":"University of Abomey-Calavi, Porto-Novo, Benin","institution_ids":["https://openalex.org/I4210119417"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044507011","display_name":"Valeria Loscr\u00ec","orcid":"https://orcid.org/0000-0003-2558-1801"},"institutions":[{"id":"https://openalex.org/I4210138412","display_name":"Centre de recherche Inria Lille - Nord Europe","ror":"https://ror.org/04eej9726","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1326498283","https://openalex.org/I4210138412"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Valeria Loscri","raw_affiliation_strings":["Inria Lille-Nord Europe, Lille, France"],"affiliations":[{"raw_affiliation_string":"Inria Lille-Nord Europe, Lille, France","institution_ids":["https://openalex.org/I4210138412"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051196445","display_name":"Marco Levorato","orcid":"https://orcid.org/0000-0002-6920-4189"},"institutions":[{"id":"https://openalex.org/I204250578","display_name":"University of California, Irvine","ror":"https://ror.org/04gyf1771","country_code":"US","type":"education","lineage":["https://openalex.org/I204250578"]},{"id":"https://openalex.org/I4210140791","display_name":"Irvine University","ror":"https://ror.org/04ysmca02","country_code":"US","type":"education","lineage":["https://openalex.org/I4210140791"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marco Levorato","raw_affiliation_strings":["UC Irvine - University of California [Irvine] (Irvine, CA 92697 - United States)"],"affiliations":[{"raw_affiliation_string":"UC Irvine - University of California [Irvine] (Irvine, CA 92697 - United States)","institution_ids":["https://openalex.org/I204250578","https://openalex.org/I4210140791"]}]}],"institution_assertions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":"123","last_page":"133"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9952,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11665","display_name":"Animal Vocal Communication and Behavior","score":0.9925,"subfield":{"id":"https://openalex.org/subfields/1309","display_name":"Developmental Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/drone","display_name":"Drone","score":0.88546383},{"id":"https://openalex.org/keywords/sight","display_name":"Sight","score":0.71019274},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.62285435}],"concepts":[{"id":"https://openalex.org/C59519942","wikidata":"https://www.wikidata.org/wiki/Q650665","display_name":"Drone","level":2,"score":0.88546383},{"id":"https://openalex.org/C1517167","wikidata":"https://www.wikidata.org/wiki/Q1134322","display_name":"Sight","level":2,"score":0.71019274},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.70849687},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.62285435},{"id":"https://openalex.org/C203718221","wikidata":"https://www.wikidata.org/wiki/Q491713","display_name":"Sound (geography)","level":2,"score":0.50485784},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4981382},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.47501954},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3512235},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.2673701},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.16987205},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3643833.3656133","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://hal.science/hal-04532239","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":23,"referenced_works":["https://openalex.org/W2144528190","https://openalex.org/W2191779130","https://openalex.org/W2488524531","https://openalex.org/W2581387731","https://openalex.org/W2607172015","https://openalex.org/W2625718276","https://openalex.org/W2901889330","https://openalex.org/W2917876423","https://openalex.org/W2931364255","https://openalex.org/W2944086006","https://openalex.org/W2996759432","https://openalex.org/W2997647433","https://openalex.org/W3022215617","https://openalex.org/W3041916388","https://openalex.org/W3081923410","https://openalex.org/W3136561353","https://openalex.org/W3192436120","https://openalex.org/W3199621185","https://openalex.org/W3210343736","https://openalex.org/W4281996560","https://openalex.org/W4312229174","https://openalex.org/W4319796192","https://openalex.org/W4323240792"],"related_works":["https://openalex.org/W4386036939","https://openalex.org/W4379143281","https://openalex.org/W4327774218","https://openalex.org/W4312858960","https://openalex.org/W4312089944","https://openalex.org/W4247925126","https://openalex.org/W4229448053","https://openalex.org/W3200286695","https://openalex.org/W2605096541","https://openalex.org/W2059768187"],"abstract_inverted_index":{"The":[0,23],"proliferation":[1],"of":[2,11,19,44,173,187],"airborne":[3],"drones,":[4],"while":[5],"instrumental":[6],"to":[7,15,37,94,144],"a":[8,79,110,135],"broad":[9],"range":[10],"applications,":[12],"has":[13],"led":[14],"an":[16],"increased":[17],"number":[18],"regulatory":[20],"non-compliance":[21],"incidents.":[22],"ubiquitous":[24],"unmanned":[25],"aerial":[26],"vehicles":[27],"(UAVs)":[28],"are":[29,54],"posing":[30],"security":[31],"risks,":[32],"since":[33],"they":[34,61],"have":[35],"started":[36],"be":[38],"used":[39],"for":[40,84,232],"cybercrimes.":[41],"Effective":[42],"detection":[43,70,86],"illicit":[45],"drones":[46,53,97],"in":[47,195,216,225],"restricted":[48],"areas":[49],"is":[50],"paramount.":[51],"Evolved":[52],"more":[55,57],"and":[56,59,91,98,118,125,150,162,169],"sophisticated,":[58],"sometimes":[60],"do":[62],"not":[63],"emit":[64],"RF-based":[65,69],"signals,":[66],"making":[67,229],"inapplicable":[68],"solutions.":[71],"Different":[72],"from":[73,101],"existing":[74],"work,":[75],"this":[76],"paper":[77],"introduces":[78],"neural":[80],"sensor":[81],"fusion":[82,112,190,207],"framework":[83],"drone":[85,210],"based":[87],"on":[88,122],"both":[89],"audio":[90,154],"video":[92],"data":[93,133],"accurately":[95],"identify":[96],"differentiate":[99],"them":[100],"similar":[102],"objects":[103],"at":[104],"long":[105],"distances.":[106],"Our":[107,202],"design":[108],"adopts":[109],"late":[111],"approach":[113],"using":[114,134],"the":[115,123,153,171,174,185],"Weighted":[116],"Average":[117],"Random":[119],"Forest":[120],"algorithm":[121],"visual":[124],"auditory":[126],"classification":[127],"pipeline.":[128],"Specifically,":[129],"we":[130,156,192],"process":[131],"infrared":[132],"You":[136],"Only":[137],"Look":[138],"Once":[139],"(YOLO)":[140],"v5":[141],"model":[142,176],"due":[143],"its":[145],"balance":[146],"between":[147],"inference":[148],"time":[149],"accuracy.":[151],"For":[152],"stream,":[155],"evaluate":[157],"Long":[158],"Short-Term":[159],"Memory":[160],"(LSTM)":[161],"Convolutional":[163],"Recurrent":[164],"Neural":[165],"Network":[166],"(CRNN)":[167],"models":[168],"demonstrate":[170,184,204],"superiority":[172],"CRNN":[175],"through":[177],"Mel-Frequency":[178],"Cepstral":[179],"Coefficients":[180],"(MFCC)":[181],"features.":[182],"To":[183],"robustness":[186],"our":[188,220],"audio-visual":[189],"approach,":[191],"validate":[193],"it":[194,230],"extensive":[196],"scenarios,":[197],"with":[198],"impaired":[199],"audio/video":[200],"data.":[201],"results":[203],"that":[205],"multimodal":[206],"significantly":[208],"improves":[209],"detection,":[211],"outperforming":[212],"traditional":[213],"single-modality":[214],"systems":[215],"complex":[217],"environments.":[218],"Additionally,":[219],"system":[221],"provides":[222],"predictions":[223],"rapidly,":[224],"just":[226],"0.382":[227],"seconds,":[228],"well-suited":[231],"real-time":[233],"applications.":[234]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4398131482","counts_by_year":[],"updated_date":"2024-12-14T07:07:30.045121","created_date":"2024-05-21"}