{"id":"https://openalex.org/W146178812","doi":"https://doi.org/10.21437/interspeech.2004-176","title":"Deterministic annealing EM algorithm in parameter estimation for acoustic model","display_name":"Deterministic annealing EM algorithm in parameter estimation for acoustic model","publication_year":2004,"publication_date":"2004-10-04","ids":{"openalex":"https://openalex.org/W146178812","doi":"https://doi.org/10.21437/interspeech.2004-176","mag":"146178812"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2004-176","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048322058","display_name":"Yohei Itaya","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yohei Itaya","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003420204","display_name":"Heiga Zen","orcid":"https://orcid.org/0000-0002-8959-5471"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Heiga Zen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023240652","display_name":"Yoshihiko Nankaku","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yoshihiko Nankaku","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108524583","display_name":"Chiyomi Miyajima","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chiyomi Miyajima","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103023678","display_name":"Keiichi Tokuda","orcid":"https://orcid.org/0000-0001-6143-0133"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Keiichi Tokuda","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5059137996","display_name":"Tadashi Kitamura","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tadashi Kitamura","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.554978,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":80,"max":81},"biblio":{"volume":null,"issue":null,"first_page":"433","last_page":"436"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9942,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.5906459}],"concepts":[{"id":"https://openalex.org/C182081679","wikidata":"https://www.wikidata.org/wiki/Q1275153","display_name":"Expectation\u2013maximization algorithm","level":3,"score":0.7524555},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.71606797},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.70577013},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.62163126},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.59846985},{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.5906459},{"id":"https://openalex.org/C126980161","wikidata":"https://www.wikidata.org/wiki/Q863783","display_name":"Simulated annealing","level":2,"score":0.4628358},{"id":"https://openalex.org/C167928553","wikidata":"https://www.wikidata.org/wiki/Q1376021","display_name":"Estimation theory","level":2,"score":0.4525397},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.4482041},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.38454962},{"id":"https://openalex.org/C49781872","wikidata":"https://www.wikidata.org/wiki/Q1045555","display_name":"Maximum likelihood","level":2,"score":0.38402408},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.34472883},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30731133},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2810783},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.11525273},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2004-176","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, justice, and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.59}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":5,"referenced_works":["https://openalex.org/W2007463795","https://openalex.org/W2049633694","https://openalex.org/W2165880886","https://openalex.org/W2286457797","https://openalex.org/W3148186152"],"related_works":["https://openalex.org/W84255947","https://openalex.org/W2473373438","https://openalex.org/W2385394209","https://openalex.org/W2368486525","https://openalex.org/W2153481672","https://openalex.org/W2153238387","https://openalex.org/W2078124810","https://openalex.org/W2077224612","https://openalex.org/W2041758614","https://openalex.org/W1978153144"],"abstract_inverted_index":{"ABSTRACT":[0],"This":[1,87],"paper":[2],"investigates":[3],"the":[4,7,21,29,34,42,49,58,65,96,105,109,137,142,158,169,175,202,210,214,218,226,253,272,281],"effectiveness":[5,50],"of":[6,82,184,238,245,280],"DAEM":[8,59],"(Determin-istic":[9],"Annealing":[10],"EM)":[11],"algorithm":[12,23,60,75,88,98,139,145],"in":[13,213],"acoustic":[14,126],"modeling":[15,127],"for":[16,79,128,140,146,197,271],"speakerand":[17],"speech":[18,131,188],"recognition.":[19],"Although":[20],"EM":[22,73,97,211,273],"has":[24,33,44],"beenwidely":[25],"used":[26],"to":[27,151,167,193,265,278],"approximate":[28],"ML":[30],"estimates,":[31],"it":[32,103,190,275],"problemof":[35],"initialization":[36],"dependence.":[37],"To":[38],"relax":[39],"this":[40,54,234],"problem,":[41],"DAEMalgorithm":[43],"been":[45,123,149],"proposed":[46],"and":[47,130,174,206,249],"con\ufb01rmed":[48],"insmall":[51],"tasks.":[52],"In":[53,134,233,256],"paper,":[55],"we":[56,259],"applied":[57],"tospeakerrecognitionbasedonGMMsandcontinuousspeechrecog-nitionbasedonHMMs.":[61],"ExperimentalresultsshowthattheDAEMalgorithm":[62],"can":[63],"improve":[64],"recognition":[66],"performance":[67],"as":[68],"comparedtotheordinaryEMalgorithmwithconventionalinitializationmeth-ods,especiallyinthe\ufb02atstarttrainingforcontinuousspeechrecog-nition.":[69],"1.":[70],"INTRODUCTION":[71],"The":[72],"(Expectation-Maximization)":[74],"[1]":[76],"is":[77,99,191,230],"widelyused":[78],"parameter":[80],"estimation":[81],"statistical":[83],"models":[84,247],"with":[85],"hiddenvariables.":[86],"provides":[89],"a":[90,100,223,267],"simple":[91],"iterative":[92],"proceduretoobtainapproximateML(maximumlikelihood)estimates.":[93],"How-ever,":[94],"since":[95],"hill-climbing":[101],"approach,":[102],"suffersfrom":[104],"local":[106,282],"maxima":[107,283],"problem.On":[108],"other":[110],"hand,":[111],"GMMs":[112],"(Gaussian":[113],"mixture":[114],"models)":[115,120],"[2]":[116],"andHMMs":[117],"(hidden":[118],"Markov":[119],"[3]":[121],"have":[122,148,262],"commonly":[124],"usedin":[125],"speaker":[129],"recognition,":[132,189],"respec-tively.":[133],"conventional":[135],"approaches,":[136],"LBG":[138],"GMMsand":[141],"segmental":[143],"k-means":[144],"HMMs":[147,239],"em-ployed":[150],"obtain":[152,194,266],"initial":[153,162,236,269],"model":[154],"parameters":[155,237],"before":[156],"applying":[157],"EMalgorithm.":[159],"However":[160],"these":[161,257],"values":[163,270],"are":[164,240],"not":[165,261],"guaranteed":[166],"benear":[168],"true":[170],"maximum":[171],"likelihood":[172],"point,":[173],"posterior":[176],"den-sity":[177],"becomes":[178],"unreliable":[179],"at":[180],"an":[181],"early":[182],"stage":[183],"training.":[185,255],"Especiallyin":[186],"continuous":[187],"dif\ufb01cult":[192],"accuratephoneme":[195],"boundaries":[196],"all":[198,243,246],"training":[199,229],"data.":[200],"Hence,":[201],"embeddedtraininghasbeenusedinwhichphonemeboundariesarealsodealtas":[203],"hidden":[204],"variables,":[205],"estimated":[207],"based":[208],"on":[209],"algorithm.Furthermore,":[212],"worse":[215],"case":[216],"that":[217],"boundary":[219],"information":[220],"isnot":[221],"available,":[222],"method":[224],"called":[225],"\ufb02at":[227],"start":[228],"often":[231],"ap-plied.":[232],"method,":[235],"given":[241],"bymaking":[242],"states":[244],"equal,":[248],"then":[250],"carry":[251],"out":[252],"em-bedded":[254],"situations,":[258],"do":[260],"enough":[263],"priorknowledge":[264],"good":[268],"algorithm,and":[274],"would":[276],"converge":[277],"one":[279],"or":[284],"saddle":[285],"points":[286]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W146178812","counts_by_year":[{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2012,"cited_by_count":2}],"updated_date":"2024-12-24T16:57:16.057496","created_date":"2016-06-24"}