{"id":"https://openalex.org/W2106792148","doi":"https://doi.org/10.1093/ietisy/e90-d.5.825","title":"A Hidden Semi-Markov Model-Based Speech Synthesis System","display_name":"A Hidden Semi-Markov Model-Based Speech Synthesis System","publication_year":2007,"publication_date":"2007-05-01","ids":{"openalex":"https://openalex.org/W2106792148","doi":"https://doi.org/10.1093/ietisy/e90-d.5.825","mag":"2106792148"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1093/ietisy/e90-d.5.825","pdf_url":null,"source":{"id":"https://openalex.org/S2486202937","display_name":"IEICE Transactions on Information and Systems","issn_l":"0916-8532","issn":["0916-8532","1745-1361"],"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4320800604","host_organization_name":"Institute of Electronics, Information and Communication Engineers","host_organization_lineage":["https://openalex.org/P4320800604"],"host_organization_lineage_names":["Institute of Electronics, Information and Communication Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003420204","display_name":"Heiga Zen","orcid":"https://orcid.org/0000-0002-8959-5471"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"H. ZEN","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038041333","display_name":"Keiichi Tokuda","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"K. TOKUDA","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064866374","display_name":"Takashi Masuko","orcid":"https://orcid.org/0000-0002-2410-2007"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"T. MASUKO","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008691845","display_name":"Takao Kobayasih","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"T. KOBAYASIH","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5060635358","display_name":"Takashi Kitamura","orcid":"https://orcid.org/0000-0001-5597-859X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"T. KITAMURA","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":19.046,"has_fulltext":false,"cited_by_count":206,"citation_normalized_percentile":{"value":0.999963,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"E90-D","issue":"5","first_page":"825","last_page":"834"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9973,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9964,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hidden-semi-markov-model","display_name":"Hidden semi-Markov model","score":0.61673295}],"concepts":[{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.86988235},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.78052545},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.71571743},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6493307},{"id":"https://openalex.org/C64939953","wikidata":"https://www.wikidata.org/wiki/Q3859882","display_name":"Hidden semi-Markov model","level":5,"score":0.61673295},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.6107779},{"id":"https://openalex.org/C163836022","wikidata":"https://www.wikidata.org/wiki/Q6771326","display_name":"Markov model","level":3,"score":0.5176792},{"id":"https://openalex.org/C112758219","wikidata":"https://www.wikidata.org/wiki/Q16038819","display_name":"Duration (music)","level":2,"score":0.51082516},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.48065436},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36164963},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.29131055},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.21826264},{"id":"https://openalex.org/C189973286","wikidata":"https://www.wikidata.org/wiki/Q176695","display_name":"Markov property","level":4,"score":0.14999682},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.057948977},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1093/ietisy/e90-d.5.825","pdf_url":null,"source":{"id":"https://openalex.org/S2486202937","display_name":"IEICE Transactions on Information and Systems","issn_l":"0916-8532","issn":["0916-8532","1745-1361"],"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4320800604","host_organization_name":"Institute of Electronics, Information and Communication Engineers","host_organization_lineage":["https://openalex.org/P4320800604"],"host_organization_lineage_names":["Institute of Electronics, Information and Communication Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, justice, and strong institutions","score":0.42,"id":"https://metadata.un.org/sdg/16"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":38,"referenced_works":["https://openalex.org/W120828","https://openalex.org/W133559434","https://openalex.org/W1507900585","https://openalex.org/W1512429158","https://openalex.org/W1514737389","https://openalex.org/W1563645159","https://openalex.org/W1600722501","https://openalex.org/W175280642","https://openalex.org/W1847897332","https://openalex.org/W1861150963","https://openalex.org/W1965255698","https://openalex.org/W1976551160","https://openalex.org/W1980719954","https://openalex.org/W2049633694","https://openalex.org/W2049686551","https://openalex.org/W2050597349","https://openalex.org/W2056191164","https://openalex.org/W2064218608","https://openalex.org/W2068970468","https://openalex.org/W2077737920","https://openalex.org/W2083393647","https://openalex.org/W2096555739","https://openalex.org/W2109772895","https://openalex.org/W2116952749","https://openalex.org/W2125838338","https://openalex.org/W2144139079","https://openalex.org/W2150658333","https://openalex.org/W2154920538","https://openalex.org/W2228674556","https://openalex.org/W2395578248","https://openalex.org/W2915907762","https://openalex.org/W3036214897","https://openalex.org/W3036802551","https://openalex.org/W3217250306","https://openalex.org/W3629425","https://openalex.org/W36786594","https://openalex.org/W75668230","https://openalex.org/W80543058"],"related_works":["https://openalex.org/W4246505579","https://openalex.org/W2799426416","https://openalex.org/W2792905593","https://openalex.org/W2537260108","https://openalex.org/W2379938888","https://openalex.org/W2134386692","https://openalex.org/W2116722627","https://openalex.org/W1977445474","https://openalex.org/W175280642","https://openalex.org/W1510894296"],"abstract_inverted_index":{"A":[0],"statistical":[1,110],"speech":[2,23,31,45,100,111],"synthesis":[3,46,80,112,154],"system":[4,42,113],"based":[5,56,114],"on":[6,57,115],"the":[7,38,58,79,83,98,139,146,153,156,160,172],"hidden":[8,117],"Markov":[9],"model":[10,51,119],"(HMM)":[11],"was":[12],"recently":[13],"proposed.":[14],"In":[15,104],"this":[16,105],"system,":[17,84],"spectrum,":[18],"excitation,":[19],"and":[20,30,53,155],"duration":[21,70,131,148],"of":[22,82,135,159,169,175],"are":[24,35,75],"modeled":[25],"simultaneously":[26],"by":[27],"context-dependent":[28],"HMMs,":[29],"parameter":[32],"vector":[33],"sequences":[34],"generated":[36],"from":[37],"HMMs":[39],"themselves.":[40],"This":[41,94],"defines":[43],"a":[44,49,109,116],"problem":[47],"in":[48,78],"generative":[50],"framework":[52],"solves":[54],"it":[55],"maximum":[59],"likelihood":[60],"(ML)":[61],"criterion.":[62],"However,":[63],"there":[64],"is":[65],"an":[66,126],"inconsistency:":[67],"although":[68],"state":[69,130,147],"probability":[71],"density":[72],"functions":[73],"(PDFs)":[74],"explicitly":[76,150],"used":[77],"part":[81],"they":[85],"have":[86],"not":[87],"been":[88],"incorporated":[89],"into":[90,151],"its":[91],"training":[92,157],"part.":[93],"inconsistency":[95,141],"can":[96,122,137,144],"make":[97],"synthesized":[99,176],"sound":[101],"less":[102],"natural.":[103],"paper,":[106],"we":[107,143],"propose":[108],"semi-Markov":[118],"(HSMM),":[120],"which":[121],"be":[123],"viewed":[124],"as":[125],"HMM":[127],"with":[128],"explicit":[129],"PDFs.":[132],"The":[133],"use":[134,168],"HSMMs":[136,170],"solve":[138],"above":[140],"because":[142],"incorporate":[145],"PDFs":[149],"both":[152],"parts":[158],"system.":[161],"Subjective":[162],"listening":[163],"test":[164],"results":[165],"show":[166],"that":[167],"improves":[171],"reported":[173],"naturalness":[174],"speech.":[177]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2106792148","counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":7},{"year":2018,"cited_by_count":13},{"year":2017,"cited_by_count":13},{"year":2016,"cited_by_count":21},{"year":2015,"cited_by_count":19},{"year":2014,"cited_by_count":25},{"year":2013,"cited_by_count":19},{"year":2012,"cited_by_count":13}],"updated_date":"2025-04-19T03:26:17.845853","created_date":"2016-06-24"}