{"id":"https://openalex.org/W4392908937","doi":"https://doi.org/10.1109/icassp48485.2024.10447160","title":"Emphasized Non-Target Speaker Knowledge in Knowledge Distillation for Automatic Speaker Verification","display_name":"Emphasized Non-Target Speaker Knowledge in Knowledge Distillation for Automatic Speaker Verification","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392908937","doi":"https://doi.org/10.1109/icassp48485.2024.10447160"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447160","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.1109/icassp48485.2024.10447160","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091500888","display_name":"Duc-Tuan Truong","orcid":"https://orcid.org/0009-0002-1767-7598"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Duc-Tuan Truong","raw_affiliation_strings":["Nanyang Technological University, Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026034735","display_name":"Ruijie Tao","orcid":"https://orcid.org/0000-0003-0021-5661"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Ruijie Tao","raw_affiliation_strings":["National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102941083","display_name":"Jia Qi Yip","orcid":"https://orcid.org/0000-0002-9896-9658"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]},{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["SG","US"],"is_corresponding":false,"raw_author_name":"Jia Qi Yip","raw_affiliation_strings":["Alibaba Group","Nanyang Technological University, Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]},{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004287909","display_name":"Kong Aik Lee","orcid":"https://orcid.org/0000-0001-9133-3000"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Kong Aik Lee","raw_affiliation_strings":["The Hong Kong Polytechnic University, Hong Kong"],"affiliations":[{"raw_affiliation_string":"The Hong Kong Polytechnic University, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070872826","display_name":"Eng Siong Chng","orcid":"https://orcid.org/0000-0001-6257-7399"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Eng Siong Chng","raw_affiliation_strings":["Nanyang Technological University, Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institution_assertions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.231,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":2,"citation_normalized_percentile":{"value":0.999975,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":93,"max":96},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9933,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9933,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9635,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker Verification","score":0.74693024},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.5227228}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.79201704},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.74693024},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.7438624},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5922539},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.5498788},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.5227228},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4199857},{"id":"https://openalex.org/C115925183","wikidata":"https://www.wikidata.org/wiki/Q1412694","display_name":"Knowledge-based systems","level":2,"score":0.41679364},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3838258},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447160","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2309.14838","pdf_url":"https://arxiv.org/pdf/2309.14838","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447160","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":22,"referenced_works":["https://openalex.org/W2219249508","https://openalex.org/W2696967604","https://openalex.org/W2890964092","https://openalex.org/W2936733796","https://openalex.org/W2981087920","https://openalex.org/W3015213852","https://openalex.org/W3024869864","https://openalex.org/W3035701170","https://openalex.org/W3166898278","https://openalex.org/W3198804855","https://openalex.org/W3205635414","https://openalex.org/W3209984917","https://openalex.org/W4226426325","https://openalex.org/W4288091954","https://openalex.org/W4296068420","https://openalex.org/W4297841573","https://openalex.org/W4319586299","https://openalex.org/W4319862287","https://openalex.org/W4372340947","https://openalex.org/W4385823178","https://openalex.org/W4385823321","https://openalex.org/W4391423796"],"related_works":["https://openalex.org/W66821593","https://openalex.org/W4247736853","https://openalex.org/W4235705411","https://openalex.org/W2911612049","https://openalex.org/W2206035908","https://openalex.org/W2162158162","https://openalex.org/W2144470400","https://openalex.org/W1999004162","https://openalex.org/W1521299571","https://openalex.org/W1493012537"],"abstract_inverted_index":{"Knowledge":[0],"distillation":[1],"(KD)":[2],"is":[3,109],"used":[4],"to":[5,111,131],"enhance":[6],"automatic":[7,50,72],"speaker":[8,51,73],"verification":[9,74],"performance":[10,70],"by":[11,77,93],"ensuring":[12],"consistency":[13],"between":[14],"large":[15],"teacher":[16],"networks":[17,21],"and":[18,95,117,133],"lightweight":[19],"student":[20,114],"at":[22],"the":[23,30,35,69,81,89,97,127],"embedding":[24],"level":[25],"or":[26],"label":[27],"level.":[28],"However,":[29],"conventional":[31,90,134],"label-level":[32,91,135],"KD":[33,92,136],"overlooks":[34],"significant":[36],"knowledge":[37,104],"from":[38],"non-target":[39,66,84,101],"speakers,":[40],"particularly":[41],"their":[42],"classification":[43,98],"probabilities,":[44],"which":[45],"can":[46],"be":[47],"crucial":[48],"for":[49],"verification.":[52],"In":[53],"this":[54,78],"paper,":[55],"we":[56,87],"first":[57],"demonstrate":[58],"that":[59],"leveraging":[60],"a":[61],"larger":[62],"number":[63],"of":[64,71,83,100,121],"training":[65],"speakers":[67,102],"improves":[68],"models.":[75],"Inspired":[76],"finding":[79],"about":[80],"importance":[82],"speakers'":[85],"knowledge,":[86],"modified":[88],"disentangling":[94],"emphasizing":[96],"probabilities":[99],"during":[103],"distillation.":[105],"The":[106],"proposed":[107],"method":[108],"applied":[110],"three":[112],"different":[113],"model":[115],"architectures":[116],"achieves":[118],"an":[119],"average":[120],"13.67%":[122],"improvement":[123],"in":[124],"EER":[125],"on":[126],"VoxCeleb":[128],"dataset":[129],"compared":[130],"embedding-level":[132],"methods.":[137],"1":[140]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4392908937","counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2024-12-07T04:32:14.821435","created_date":"2024-03-19"}