{"id":"https://openalex.org/W2141102245","doi":"https://doi.org/10.1109/jstsp.2010.2069750","title":"Long-Term Spectro-Temporal and Static Harmonic Features for Voice Activity Detection","display_name":"Long-Term Spectro-Temporal and Static Harmonic Features for Voice Activity Detection","publication_year":2010,"publication_date":"2010-08-27","ids":{"openalex":"https://openalex.org/W2141102245","doi":"https://doi.org/10.1109/jstsp.2010.2069750","mag":"2141102245"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2010.2069750","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037089480","display_name":"Takashi Fukuda","orcid":"https://orcid.org/0000-0001-9599-6274"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"funder","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Takashi Fukuda","raw_affiliation_strings":["IBM Res. - Tokyo, Yamato, Japan"],"affiliations":[{"raw_affiliation_string":"IBM Res. - Tokyo, Yamato, Japan","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033500018","display_name":"Osamu Ichikawa","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"funder","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Osamu Ichikawa","raw_affiliation_strings":["IBM Res. - Tokyo, Yamato, Japan"],"affiliations":[{"raw_affiliation_string":"IBM Res. - Tokyo, Yamato, Japan","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5083574298","display_name":"Masafumi Nishimura","orcid":"https://orcid.org/0000-0001-7633-9340"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"funder","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Masafumi Nishimura","raw_affiliation_strings":["IBM Res. - Tokyo, Yamato, Japan"],"affiliations":[{"raw_affiliation_string":"IBM Res. - Tokyo, Yamato, Japan","institution_ids":["https://openalex.org/I1341412227"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.384,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":40,"citation_normalized_percentile":{"value":0.900423,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":93,"max":94},"biblio":{"volume":"4","issue":"5","first_page":"834","last_page":"844"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness","score":0.7415128},{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.70663285},{"id":"https://openalex.org/keywords/cepstrum","display_name":"Cepstrum","score":0.6661757},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.5412002},{"id":"https://openalex.org/keywords/harmonic","display_name":"Harmonic","score":0.4274238}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.78139645},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7760024},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7415128},{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.70663285},{"id":"https://openalex.org/C88485024","wikidata":"https://www.wikidata.org/wiki/Q1054571","display_name":"Cepstrum","level":2,"score":0.6661757},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.66464996},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.5412002},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4974337},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.4834631},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.4488627},{"id":"https://openalex.org/C127934551","wikidata":"https://www.wikidata.org/wiki/Q1148098","display_name":"Harmonic","level":2,"score":0.4274238},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.42402577},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38892782},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.38828817},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37946597},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.33988625},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.111412585},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2010.2069750","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.61,"display_name":"Peace, justice, and strong institutions"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":25,"referenced_works":["https://openalex.org/W180772233","https://openalex.org/W190289757","https://openalex.org/W1971332722","https://openalex.org/W1999686891","https://openalex.org/W2005317035","https://openalex.org/W2075012882","https://openalex.org/W2105371879","https://openalex.org/W2115717467","https://openalex.org/W2117321034","https://openalex.org/W2121973264","https://openalex.org/W2129120544","https://openalex.org/W2140686411","https://openalex.org/W2149457403","https://openalex.org/W2153754015","https://openalex.org/W2163288271","https://openalex.org/W2163667343","https://openalex.org/W2166607750","https://openalex.org/W2167763959","https://openalex.org/W2171748469","https://openalex.org/W23754177","https://openalex.org/W2574270357","https://openalex.org/W289873009","https://openalex.org/W3147539069","https://openalex.org/W59508977","https://openalex.org/W64868312"],"related_works":["https://openalex.org/W4385672897","https://openalex.org/W2387604097","https://openalex.org/W2373675101","https://openalex.org/W2359140082","https://openalex.org/W2160511961","https://openalex.org/W2074132948","https://openalex.org/W2066371342","https://openalex.org/W2018086531","https://openalex.org/W1980297060","https://openalex.org/W106160982"],"abstract_inverted_index":{"Accurate":[0],"voice":[1],"activity":[2],"detection":[3],"(VAD)":[4],"is":[5,85],"important":[6],"for":[7,47],"robust":[8],"automatic":[9],"speech":[10],"recognition":[11],"(ASR)":[12],"systems.":[13],"This":[14,90],"paper":[15,91],"proposes":[16],"a":[17,59,94,158,162],"statistical-model-based":[18],"noise-robust":[19],"VAD":[20,145],"algorithm":[21],"using":[22],"long-term":[23,116],"temporal":[24,32,56,117],"information":[25,33,78,84,102],"and":[26,118],"harmonic-structure-based":[27],"features":[28,57,122],"in":[29,58,79,143,157],"speech.":[30],"Long-term":[31],"has":[34,41],"recently":[35],"become":[36],"an":[37],"ASR":[38,164],"focus,":[39],"but":[40,82],"not":[42],"yet":[43],"been":[44],"deeply":[45],"investigated":[46],"VAD.":[48],"In":[49,68,147],"this":[50],"paper,":[51],"we":[52],"first":[53],"consider":[54],"the":[55,64,70,99,115,119,140,149],"cepstral":[60],"domain":[61],"calculated":[62],"over":[63],"average":[65,136],"phoneme":[66],"duration.":[67],"contrast,":[69],"harmonic":[71,100,121],"structures":[72],"are":[73],"well-known":[74],"bearers":[75],"of":[76],"acoustic":[77],"human":[80],"voices,":[81],"that":[83,160],"difficult":[86],"to":[87,97,124],"exploit":[88,98],"statistically.":[89],"further":[92],"describes":[93],"new":[95],"method":[96,112],"structure":[101],"with":[103,131,139],"statistical":[104],"models,":[105],"providing":[106],"additional":[107],"noise":[108],"robustness.":[109],"The":[110],"proposed":[111],"including":[113],"both":[114],"static":[120],"led":[123],"considerable":[125],"improvements":[126],"under":[127],"low":[128],"SNR":[129],"conditions,":[130],"77.7%":[132],"error":[133,151],"reduction":[134],"on":[135],"as":[137],"compared":[138],"ETSI":[141],"AFE-VAD":[142],"our":[144],"testing.":[146],"addition,":[148],"word":[150],"rate":[152],"was":[153],"reduced":[154],"by":[155],"29.1%":[156],"test":[159],"included":[161],"full":[163],"system.":[165]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2141102245","counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":6},{"year":2015,"cited_by_count":6},{"year":2014,"cited_by_count":8},{"year":2013,"cited_by_count":7},{"year":2012,"cited_by_count":2}],"updated_date":"2025-04-19T10:16:48.766585","created_date":"2016-06-24"}