{"id":"https://openalex.org/W4386875031","doi":"https://doi.org/10.48550/arxiv.2309.08837","title":"FastGraphTTS: An Ultrafast Syntax-Aware Speech Synthesis Framework","display_name":"FastGraphTTS: An Ultrafast Syntax-Aware Speech Synthesis Framework","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4386875031","doi":"https://doi.org/10.48550/arxiv.2309.08837"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2309.08837","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2309.08837","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074472751","display_name":"Jianzong Wang","orcid":"https://orcid.org/0000-0002-9237-4231"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jianzong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028437800","display_name":"Xulong Zhang","orcid":"https://orcid.org/0000-0001-7005-992X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xulong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022188380","display_name":"Aolan Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Aolan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065576761","display_name":"Ning Cheng","orcid":"https://orcid.org/0000-0003-0909-5328"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, Ning","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5016038454","display_name":"Jing Xiao","orcid":"https://orcid.org/0000-0001-9615-4749"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Jing","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":65},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9968,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9928,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-translation","display_name":"Speech translation","score":0.5233716},{"id":"https://openalex.org/keywords/mandarin-chinese","display_name":"Mandarin Chinese","score":0.5136623},{"id":"https://openalex.org/keywords/discriminator","display_name":"Discriminator","score":0.42088896}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8576445},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.601399},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.5606046},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5302427},{"id":"https://openalex.org/C2780366754","wikidata":"https://www.wikidata.org/wiki/Q7494857","display_name":"Speech translation","level":3,"score":0.5233716},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5146633},{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.5136623},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5017226},{"id":"https://openalex.org/C60048249","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntax","level":2,"score":0.4335849},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.42629433},{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.42512527},{"id":"https://openalex.org/C2779803651","wikidata":"https://www.wikidata.org/wiki/Q5282088","display_name":"Discriminator","level":3,"score":0.42088896},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.35078573},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.11951545},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.10562405},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2309.08837","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.08837","pdf_url":"http://arxiv.org/pdf/2309.08837","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2309.08837","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2309.08837","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"display_name":"Quality education","id":"https://metadata.un.org/sdg/4","score":0.55}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4380714744","https://openalex.org/W4319453655","https://openalex.org/W4293202849","https://openalex.org/W2990005675","https://openalex.org/W2964074194","https://openalex.org/W2387995142","https://openalex.org/W2374317326","https://openalex.org/W2089959425","https://openalex.org/W1980965563","https://openalex.org/W1489300767"],"abstract_inverted_index":{"This":[0],"paper":[1],"integrates":[2],"graph-to-sequence":[3],"into":[4],"an":[5],"end-to-end":[6],"text-to-speech":[7],"framework":[8],"for":[9],"syntax-aware":[10],"modelling":[11],"with":[12,53,139],"syntactic":[13,32,35,47],"information":[14],"of":[15,85,119,125,134],"input":[16,20,57,100],"text.":[17],"Specifically,":[18],"the":[19,46,59,67,111,117,123,126,132,135],"text":[21,101],"is":[22,37,51,73,128],"parsed":[23],"by":[24,40],"a":[25,31,41],"dependency":[26],"parsing":[27],"module":[28],"to":[29,44,58,65],"form":[30],"graph.":[33],"The":[34,71],"graph":[36,42],"then":[38],"encoded":[39],"encoder":[43],"extract":[45],"hidden":[48],"information,":[49],"which":[50],"concatenated":[52],"phoneme":[54],"embedding":[55],"and":[56,61,79,88,102,105,115],"alignment":[60],"flow-based":[62],"decoding":[63],"modules":[64],"generate":[66],"raw":[68],"audio":[69],"waveform.":[70],"model":[72,127],"experimented":[74],"on":[75],"two":[76],"languages,":[77],"English":[78],"Mandarin,":[80],"using":[81],"single-speaker,":[82],"few":[83],"samples":[84],"target":[86],"speakers,":[87],"multi-speaker":[89],"datasets,":[90],"respectively.":[91],"Experimental":[92],"results":[93],"show":[94,116],"better":[95],"prosodic":[96,113],"consistency":[97],"performance":[98],"between":[99],"generated":[103],"audio,":[104],"also":[106],"get":[107],"higher":[108],"scores":[109],"in":[110],"subjective":[112],"evaluation,":[114],"ability":[118],"voice":[120],"conversion.":[121],"Besides,":[122],"efficiency":[124],"largely":[129],"boosted":[130],"through":[131],"design":[133],"AI":[136],"chip":[137],"operator":[138],"5x":[140],"acceleration.":[141]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4386875031","counts_by_year":[],"updated_date":"2025-04-12T23:23:32.868816","created_date":"2023-09-20"}