{"id":"https://openalex.org/W4388327733","doi":"https://doi.org/10.48550/arxiv.2311.00489","title":"Deep Neural Networks for Automatic Speaker Recognition Do Not Learn Supra-Segmental Temporal Features","display_name":"Deep Neural Networks for Automatic Speaker Recognition Do Not Learn Supra-Segmental Temporal Features","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4388327733","doi":"https://doi.org/10.48550/arxiv.2311.00489"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2311.00489","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2311.00489","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005900677","display_name":"Daniel Neururer","orcid":"https://orcid.org/0000-0003-0972-6556"},"institutions":[{"id":"https://openalex.org/I858936495","display_name":"ZHAW Zurich University of Applied Sciences","ror":"https://ror.org/05pmsvm27","country_code":"CH","type":"funder","lineage":["https://openalex.org/I858936495"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Daniel Neururer","raw_affiliation_strings":["Centre for Artificial Intelligence, Zurich University of Applied Sciences, Technikumstrasse 71, 8400, Winterthur, Switzerland"],"affiliations":[{"raw_affiliation_string":"Centre for Artificial Intelligence, Zurich University of Applied Sciences, Technikumstrasse 71, 8400, Winterthur, Switzerland","institution_ids":["https://openalex.org/I858936495"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028554788","display_name":"Volker Dellwo","orcid":"https://orcid.org/0000-0002-8494-6025"},"institutions":[{"id":"https://openalex.org/I202697423","display_name":"University of Zurich","ror":"https://ror.org/02crff812","country_code":"CH","type":"funder","lineage":["https://openalex.org/I202697423"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Volker Dellwo","raw_affiliation_strings":["Department of Computational Linguistics, University of Zurich, Andreasstrasse 15, 8050, Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"Department of Computational Linguistics, University of Zurich, Andreasstrasse 15, 8050, Zurich, Switzerland","institution_ids":["https://openalex.org/I202697423"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021570324","display_name":"Thilo Stadelmann","orcid":"https://orcid.org/0000-0002-3784-0420"},"institutions":[{"id":"https://openalex.org/I4210137980","display_name":"European Centre for Living Technology","ror":"https://ror.org/04kesq777","country_code":"IT","type":"facility","lineage":["https://openalex.org/I4210137980"]},{"id":"https://openalex.org/I858936495","display_name":"ZHAW Zurich University of Applied Sciences","ror":"https://ror.org/05pmsvm27","country_code":"CH","type":"funder","lineage":["https://openalex.org/I858936495"]}],"countries":["CH","IT"],"is_corresponding":false,"raw_author_name":"Thilo Stadelmann","raw_affiliation_strings":["Centre for Artificial Intelligence, Zurich University of Applied Sciences, Technikumstrasse 71, 8400, Winterthur, Switzerland","European Centre for Living Technology (ECLT), Ca' Bottacin, Dorsoduro 3911, 30123, Venice, Italy"],"affiliations":[{"raw_affiliation_string":"European Centre for Living Technology (ECLT), Ca' Bottacin, Dorsoduro 3911, 30123, Venice, Italy","institution_ids":["https://openalex.org/I4210137980"]},{"raw_affiliation_string":"Centre for Artificial Intelligence, Zurich University of Applied Sciences, Technikumstrasse 71, 8400, Winterthur, Switzerland","institution_ids":["https://openalex.org/I858936495"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5028554788"],"corresponding_institution_ids":["https://openalex.org/I202697423"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":65},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9935,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep Neural Networks","score":0.5566724}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7804867},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.69263875},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6315885},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5899565},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5644028},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.56114084},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.5566724},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.52993244},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5141037},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.505168},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3417657},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2311.00489","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2311.00489","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2311.00489","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.57,"display_name":"Peace, justice, and strong institutions","id":"https://metadata.un.org/sdg/16"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":36,"referenced_works":["https://openalex.org/W1614298861","https://openalex.org/W1936725236","https://openalex.org/W1974932989","https://openalex.org/W2015919351","https://openalex.org/W2031780801","https://openalex.org/W2067780165","https://openalex.org/W2143612262","https://openalex.org/W2194775991","https://openalex.org/W2199470882","https://openalex.org/W2726515241","https://openalex.org/W2748488820","https://openalex.org/W2772376594","https://openalex.org/W2884643232","https://openalex.org/W2885140592","https://openalex.org/W2916104401","https://openalex.org/W2936774411","https://openalex.org/W2952533820","https://openalex.org/W2962898354","https://openalex.org/W2998376432","https://openalex.org/W3004368638","https://openalex.org/W3013020904","https://openalex.org/W3015302980","https://openalex.org/W3095992690","https://openalex.org/W3105432754","https://openalex.org/W3137249133","https://openalex.org/W3138678423","https://openalex.org/W3149839747","https://openalex.org/W3156052130","https://openalex.org/W3163479226","https://openalex.org/W3171849353","https://openalex.org/W3186197713","https://openalex.org/W4210694145","https://openalex.org/W4221167533","https://openalex.org/W4226434736","https://openalex.org/W4248480789","https://openalex.org/W4299555621"],"related_works":["https://openalex.org/W4377865163","https://openalex.org/W4315865067","https://openalex.org/W4298287631","https://openalex.org/W3208304128","https://openalex.org/W3193857078","https://openalex.org/W3008584592","https://openalex.org/W3000197790","https://openalex.org/W2979433843","https://openalex.org/W2953061907","https://openalex.org/W2888956734"],"abstract_inverted_index":{"While":[0],"deep":[1,167],"neural":[2,80,118],"networks":[3,81],"have":[4],"shown":[5],"impressive":[6],"results":[7,136],"in":[8,37,55],"automatic":[9],"speaker":[10,83,122],"recognition":[11,84,123],"and":[12,66,91,105,116,154],"related":[13],"tasks,":[14],"it":[15],"is":[16,20,25],"dissatisfactory":[17],"how":[18],"little":[19],"understood":[21],"about":[22],"what":[23,74],"exactly":[24],"responsible":[26],"for":[27,82,121,142,169],"these":[28],"results.":[29],"Part":[30],"of":[31,53,78,114,149,161,166],"the":[32,76,150,158],"success":[33],"has":[34],"been":[35],"attributed":[36],"prior":[38],"work":[39],"to":[40,43,57,71,73,96,100,128],"their":[41,107],"capability":[42],"model":[44,126],"supra-segmental":[45],"temporal":[46],"information":[47],"(SST),":[48],"i.e.,":[49],"learn":[50],"rhythmic-prosodic":[51],"characteristics":[52],"speech":[54,152,170],"addition":[56],"spectral":[58],"features.":[59],"In":[60],"this":[61],"paper,":[62],"we":[63],"(i)":[64],"present":[65,93],"apply":[67],"a":[68,112,138],"novel":[69],"test":[70],"quantify":[72],"extent":[75],"performance":[77],"state-of-the-art":[79],"can":[85],"be":[86],"explained":[87],"by":[88],"modeling":[89],"SST;":[90],"(ii)":[92],"several":[94],"means":[95],"force":[97],"respective":[98],"nets":[99],"focus":[101],"more":[102],"on":[103],"SST":[104,127],"evaluate":[106],"merits.":[108],"We":[109],"find":[110],"that":[111],"variety":[113],"CNN-":[115],"RNN-based":[117],"network":[119],"architectures":[120],"do":[124],"not":[125],"any":[129],"sufficient":[130],"degree,":[131],"even":[132],"when":[133],"forced.":[134],"The":[135],"provide":[137],"highly":[139],"relevant":[140],"basis":[141],"impactful":[143],"future":[144],"research":[145],"into":[146,157],"better":[147],"exploitation":[148],"full":[151],"signal":[153],"give":[155],"insights":[156],"inner":[159],"workings":[160],"such":[162],"networks,":[163],"enhancing":[164],"explainability":[165],"learning":[168],"technologies.":[171]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4388327733","counts_by_year":[],"updated_date":"2025-04-08T20:10:01.075370","created_date":"2023-11-04"}