{"id":"https://openalex.org/W4221146823","doi":"https://doi.org/10.48550/arxiv.2203.10473","title":"ECAPA-TDNN for Multi-speaker Text-to-speech Synthesis","display_name":"ECAPA-TDNN for Multi-speaker Text-to-speech Synthesis","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4221146823","doi":"https://doi.org/10.48550/arxiv.2203.10473"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2203.10473","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2203.10473","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033565985","display_name":"Jinlong Xue","orcid":"https://orcid.org/0009-0000-0442-0932"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xue, Jinlong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008597430","display_name":"Yayue Deng","orcid":"https://orcid.org/0009-0003-7642-4942"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deng, Yayue","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088698551","display_name":"Yichen Han","orcid":"https://orcid.org/0000-0003-4915-3916"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Yichen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100343662","display_name":"Ya Li","orcid":"https://orcid.org/0000-0002-6284-5039"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Ya","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113103312","display_name":"Jianqing Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Jianqing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5110832899","display_name":"Jiaen Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Jiaen","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":60},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9869,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9603,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5516403},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.5166393}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.83281386},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7452519},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.71848},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.62329847},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5985404},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5867132},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5517938},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5516403},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.5166393},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.46136656},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4114284},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2203.10473","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2203.10473","pdf_url":"http://arxiv.org/pdf/2203.10473","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2203.10473","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2203.10473","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.68,"id":"https://metadata.un.org/sdg/4","display_name":"Quality education"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391272374","https://openalex.org/W2946856121","https://openalex.org/W2535215250","https://openalex.org/W2433276473","https://openalex.org/W2206035908","https://openalex.org/W2108985546","https://openalex.org/W2081919107","https://openalex.org/W1984347656","https://openalex.org/W1914543332","https://openalex.org/W1537411440"],"abstract_inverted_index":{"In":[0,32],"recent":[1],"years,":[2],"neural":[3],"network":[4],"based":[5,73,88,125],"methods":[6,25,129,136],"for":[7,55],"multi-speaker":[8],"text-to-speech":[9],"synthesis":[10],"(TTS)":[11],"have":[12],"made":[13],"significant":[14],"progress.":[15],"However,":[16],"the":[17,75,119],"current":[18],"speaker":[19,30,39,71,83,98],"encoder":[20,40,72,99],"models":[21,100],"used":[22],"in":[23,140],"these":[24,135],"still":[26],"cannot":[27],"capture":[28],"enough":[29],"information.":[31],"this":[33],"paper,":[34],"we":[35,117],"focus":[36],"on":[37,74],"accurate":[38],"modeling":[41],"and":[42,52,58,90,109,134],"propose":[43],"an":[44],"end-to-end":[45],"method":[46,104],"that":[47],"can":[48,105],"generate":[49],"high-quality":[50],"speech":[51,142],"better":[53,107],"similarity":[54],"both":[56],"seen":[57],"unseen":[59],"speakers.":[60],"The":[61,94],"proposed":[62,103],"architecture":[63],"consists":[64],"of":[65],"three":[66],"separately":[67],"trained":[68],"components:":[69],"a":[70,86,91],"state-of-the-art":[76],"ECAPA-TDNN":[77],"model":[78],"which":[79],"is":[80],"derived":[81],"from":[82],"verification":[84],"task,":[85],"FastSpeech2":[87],"synthesizer,":[89],"HiFi-GAN":[92],"vocoder.":[93],"comparison":[95],"among":[96],"different":[97],"shows":[101],"our":[102,114,132],"achieve":[106],"naturalness":[108],"similarity.":[110],"To":[111],"efficiently":[112],"evaluate":[113],"synthesized":[115],"speech,":[116],"are":[118],"first":[120],"to":[121,130],"adopt":[122],"deep":[123],"learning":[124],"automatic":[126,141],"MOS":[127],"evaluation":[128],"assess":[131],"results,":[133],"show":[137],"great":[138],"potential":[139],"quality":[143],"assessment.":[144]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4221146823","counts_by_year":[],"updated_date":"2024-12-16T05:48:02.343344","created_date":"2022-04-03"}