{"id":"https://openalex.org/W4392682212","doi":"https://doi.org/10.48550/arxiv.2403.05380","title":"Spectrogram-Based Detection of Auto-Tuned Vocals in Music Recordings","display_name":"Spectrogram-Based Detection of Auto-Tuned Vocals in Music Recordings","publication_year":2024,"publication_date":"2024-03-08","ids":{"openalex":"https://openalex.org/W4392682212","doi":"https://doi.org/10.48550/arxiv.2403.05380"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.05380","pdf_url":"http://arxiv.org/pdf/2403.05380","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2403.05380","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5094123887","display_name":"Mahyar Gohari","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gohari, Mahyar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051370303","display_name":"Paolo Bestagini","orcid":"https://orcid.org/0000-0003-0406-0222"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bestagini, Paolo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004055341","display_name":"Sergio Benini","orcid":"https://orcid.org/0000-0003-2152-9424"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Benini, Sergio","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5085695707","display_name":"Nicola Adami","orcid":"https://orcid.org/0000-0002-8879-9456"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Adami, Nicola","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9947,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9947,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9758,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.8712572}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.8712572},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.45044106},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.38241708},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.351448},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.34376222},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.14226425}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.05380","pdf_url":"http://arxiv.org/pdf/2403.05380","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.05380","pdf_url":"http://arxiv.org/pdf/2403.05380","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3183948672","https://openalex.org/W3173606202","https://openalex.org/W3110381201","https://openalex.org/W2948807893","https://openalex.org/W2935909890","https://openalex.org/W2897924318","https://openalex.org/W2778153218","https://openalex.org/W2748952813","https://openalex.org/W2138997758","https://openalex.org/W1531601525"],"abstract_inverted_index":{"In":[0],"the":[1,9,16,26,37,87,111,118,134,137],"domain":[2],"of":[3,11,15,28,49,89,113,120,124,136],"music":[4,74,80],"production":[5],"and":[6,44,62,69,83,126,143,154],"audio":[7,128,159],"processing,":[8],"implementation":[10],"automatic":[12],"pitch":[13],"correction":[14],"singing":[17],"voice,":[18],"also":[19,54],"known":[20],"as":[21],"Auto-Tune,":[22],"has":[23,34,53,76,95],"significantly":[24],"transformed":[25],"landscape":[27],"vocal":[29,42],"performance.":[30],"While":[31],"auto-tuning":[32],"technology":[33],"offered":[35],"musicians":[36],"ability":[38],"to":[39,86,146],"tune":[40],"their":[41],"pitches":[43],"achieve":[45],"a":[46,66,104,121],"desired":[47],"level":[48],"precision,":[50],"its":[51,58],"use":[52],"sparked":[55],"debates":[56],"regarding":[57],"impact":[59],"on":[60],"authenticity":[61],"artistic":[63],"integrity.":[64],"As":[65],"result,":[67],"detecting":[68],"analyzing":[70],"Auto-Tuned":[71,114,127],"vocals":[72],"in":[73,98,140],"recordings":[75],"become":[77],"essential":[78],"for":[79,110,152,157],"scholars,":[81],"producers,":[82],"listeners.":[84],"However,":[85],"best":[88],"our":[90],"knowledge,":[91],"no":[92],"prior":[93],"effort":[94],"been":[96],"made":[97],"this":[99],"direction.":[100],"This":[101],"study":[102],"introduces":[103],"data-driven":[105],"approach":[106],"leveraging":[107],"triplet":[108],"networks":[109],"detection":[112],"songs,":[115],"backed":[116],"by":[117],"creation":[119],"dataset":[122],"composed":[123],"original":[125],"clips.":[129],"The":[130],"experimental":[131],"results":[132],"demonstrate":[133],"superiority":[135],"proposed":[138,151],"method":[139],"both":[141],"accuracy":[142],"robustness":[144],"compared":[145],"Rawnet2,":[147],"an":[148],"end-to-end":[149],"model":[150],"anti-spoofing":[153],"widely":[155],"used":[156],"other":[158],"forensic":[160],"tasks.":[161]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4392682212","counts_by_year":[],"updated_date":"2024-12-15T19:57:19.467366","created_date":"2024-03-13"}