{"id":"https://openalex.org/W2991571120","doi":"https://doi.org/10.1145/3364908.3365292","title":"A New End-to-End Long-Time Speech Synthesis System Based on Tacotron2","display_name":"A New End-to-End Long-Time Speech Synthesis System Based on Tacotron2","publication_year":2019,"publication_date":"2019-09-20","ids":{"openalex":"https://openalex.org/W2991571120","doi":"https://doi.org/10.1145/3364908.3365292","mag":"2991571120"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3364908.3365292","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047711459","display_name":"Renyuan Liu","orcid":"https://orcid.org/0000-0002-8355-2390"},"institutions":[{"id":"https://openalex.org/I189210763","display_name":"Yunnan University","ror":"https://ror.org/0040axw97","country_code":"CN","type":"education","lineage":["https://openalex.org/I189210763"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Renyuan Liu","raw_affiliation_strings":["Yunnan University Kunming, Yunnan Province, China"],"affiliations":[{"raw_affiliation_string":"Yunnan University Kunming, Yunnan Province, China","institution_ids":["https://openalex.org/I189210763"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037577175","display_name":"Jian Yang","orcid":"https://orcid.org/0000-0003-4887-3444"},"institutions":[{"id":"https://openalex.org/I189210763","display_name":"Yunnan University","ror":"https://ror.org/0040axw97","country_code":"CN","type":"education","lineage":["https://openalex.org/I189210763"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Yang","raw_affiliation_strings":["Yunnan University Kunming, Yunnan Province, China"],"affiliations":[{"raw_affiliation_string":"Yunnan University Kunming, Yunnan Province, China","institution_ids":["https://openalex.org/I189210763"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100705476","display_name":"Mengyuan Liu","orcid":"https://orcid.org/0009-0008-7792-9092"},"institutions":[{"id":"https://openalex.org/I189210763","display_name":"Yunnan University","ror":"https://ror.org/0040axw97","country_code":"CN","type":"education","lineage":["https://openalex.org/I189210763"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengyuan Liu","raw_affiliation_strings":["Yunnan University Kunming, Yunnan Province, China"],"affiliations":[{"raw_affiliation_string":"Yunnan University Kunming, Yunnan Province, China","institution_ids":["https://openalex.org/I189210763"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.343,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.550522,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":82,"max":83},"biblio":{"volume":null,"issue":null,"first_page":"46","last_page":"50"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9934,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9933,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminator","display_name":"Discriminator","score":0.79785466},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.46227297},{"id":"https://openalex.org/keywords/mean-opinion-score","display_name":"Mean opinion score","score":0.44003427}],"concepts":[{"id":"https://openalex.org/C2779803651","wikidata":"https://www.wikidata.org/wiki/Q5282088","display_name":"Discriminator","level":3,"score":0.79785466},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.75743663},{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.7104832},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.6959228},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.6749173},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6685779},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.510476},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.46227297},{"id":"https://openalex.org/C62897895","wikidata":"https://www.wikidata.org/wiki/Q1915482","display_name":"Mean opinion score","level":3,"score":0.44003427},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.41672087},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3594085},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.13724172},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.08085111},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.075847596},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3364908.3365292","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.64}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":15,"referenced_works":["https://openalex.org/W2593414223","https://openalex.org/W2604817080","https://openalex.org/W2626778328","https://openalex.org/W2771088323","https://openalex.org/W2794490148","https://openalex.org/W2795109282","https://openalex.org/W2885800352","https://openalex.org/W2892140764","https://openalex.org/W2949382160","https://openalex.org/W2963270749","https://openalex.org/W2963609956","https://openalex.org/W2963971656","https://openalex.org/W2964243274","https://openalex.org/W2972951102","https://openalex.org/W3103403019"],"related_works":["https://openalex.org/W4391272374","https://openalex.org/W40885451","https://openalex.org/W2946856121","https://openalex.org/W2108985546","https://openalex.org/W2081919107","https://openalex.org/W1984347656","https://openalex.org/W1927421023","https://openalex.org/W1914543332","https://openalex.org/W169399214","https://openalex.org/W10581632"],"abstract_inverted_index":{"End-to-end":[0],"speech":[1,31,102,175,188],"synthesis":[2,52,69,176],"breaks":[3],"away":[4],"from":[5],"the":[6,51,55,89,98,105,117,138,151,165,171,174],"original":[7],"system":[8],"framework":[9],"and":[10,54,68],"directly":[11],"converts":[12],"text":[13],"into":[14],"speech.":[15,59],"Although":[16],"it":[17,33],"is":[18,22,42,61,108,154,177],"shown":[19],"that":[20,137],"Tacotron2":[21],"superior":[23],"to":[24,74,147,164,179],"traditional":[25],"piping":[26],"systems":[27],"in":[28,40,44,111],"terms":[29],"of":[30,58,100,104,173,186],"naturalness,":[32],"still":[34],"has":[35,161,189],"many":[36],"defects.":[37],"A":[38],"flaw":[39],"tacotron2":[41],"mentioned":[43,120],"this":[45,76,122],"paper.,":[46],"which":[47],"impacts":[48],"negatively":[49],"upon":[50],"quality":[53],"synthesized":[56],"length":[57,172],"It":[60],"cumulative":[62,118,148],"error":[63,119,149],"between":[64],"training":[65,126],"process":[66,70],"(forward)":[67],"(inference).":[71],"In":[72],"order":[73],"improve":[75],"problem,":[77],"an":[78],"unsupervised":[79],"GAN":[80,93],"(Generative":[81],"Adversarial":[82],"Network)":[83],"model":[84,94],"was":[85],"proposed":[86,92],"based":[87,131],"on":[88,132],"Tacotron2.":[90,133],"The":[91,184],"can":[95],"also":[96,109,190],"optimize":[97],"prosody":[99,106,185],"synthesize":[101],"because":[103],"discriminator":[107],"designed":[110],"our":[112],"model.":[113],"For":[114,157],"further":[115],"reduce":[116],"above,":[121],"paper":[123],"propose":[124],"a":[125,144,159],"strategy":[127],"called":[128],"\"random":[129],"down\"":[130],"And":[134],"then":[135],"demonstrate":[136],"unimportant":[139],"attention":[140,166],"weights":[141],"could":[142],"be":[143],"contributing":[145],"factor":[146],"when":[150],"input":[152],"sequence":[153],"too":[155],"long.":[156],"this,":[158],"window":[160],"been":[162,191],"added":[163],"weights.":[167],"Through":[168],"these":[169],"method,":[170],"improved":[178],"about":[180],"1000":[181],"encoder":[182],"output.":[183],"synthetic":[187],"improved.":[192]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2991571120","counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":3}],"updated_date":"2025-01-07T00:41:47.040567","created_date":"2019-12-05"}