{"id":"https://openalex.org/W2940111934","doi":"https://doi.org/10.1109/icassp.2019.8682710","title":"M-vectors: Sub-band Based Energy Modulation Features for Multi-stream Automatic Speech Recognition","display_name":"M-vectors: Sub-band Based Energy Modulation Features for Multi-stream Automatic Speech Recognition","publication_year":2019,"publication_date":"2019-04-17","ids":{"openalex":"https://openalex.org/W2940111934","doi":"https://doi.org/10.1109/icassp.2019.8682710","mag":"2940111934"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8682710","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021725828","display_name":"Samik Sadhu","orcid":null},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Samik Sadhu","raw_affiliation_strings":["Center for Language and Speech Processing, Johns Hopkins University, Baltimore, MD, USA"],"affiliations":[{"raw_affiliation_string":"Center for Language and Speech Processing, Johns Hopkins University, Baltimore, MD, USA","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100760516","display_name":"Ruizhi Li","orcid":"https://orcid.org/0000-0002-2496-5224"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ruizhi Li","raw_affiliation_strings":["Center for Language and Speech Processing, Johns Hopkins University, Baltimore, MD, USA"],"affiliations":[{"raw_affiliation_string":"Center for Language and Speech Processing, Johns Hopkins University, Baltimore, MD, USA","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042260050","display_name":"Hynek He\u0159mansk\u00fd","orcid":"https://orcid.org/0000-0001-8032-4811"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hynek Hermansky","raw_affiliation_strings":["Center for Language and Speech Processing, Johns Hopkins University, Baltimore, MD, USA"],"affiliations":[{"raw_affiliation_string":"Center for Language and Speech Processing, Johns Hopkins University, Baltimore, MD, USA","institution_ids":["https://openalex.org/I145311948"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.661,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":17,"citation_normalized_percentile":{"value":0.881774,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":91},"biblio":{"volume":null,"issue":null,"first_page":"6545","last_page":"6549"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9985,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.90282875},{"id":"https://openalex.org/keywords/modulation","display_name":"Modulation (music)","score":0.5547647},{"id":"https://openalex.org/keywords/cepstrum","display_name":"Cepstrum","score":0.4897921},{"id":"https://openalex.org/keywords/feature-vector","display_name":"Feature vector","score":0.48179072},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.453506}],"concepts":[{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.90282875},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7948582},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7448476},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.5827232},{"id":"https://openalex.org/C123079801","wikidata":"https://www.wikidata.org/wiki/Q750240","display_name":"Modulation (music)","level":2,"score":0.5547647},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.5308156},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.52135384},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.51512897},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.50814486},{"id":"https://openalex.org/C88485024","wikidata":"https://www.wikidata.org/wiki/Q1054571","display_name":"Cepstrum","level":2,"score":0.4897921},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4841007},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.48179072},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.45517898},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.453506},{"id":"https://openalex.org/C11930861","wikidata":"https://www.wikidata.org/wiki/Q181417","display_name":"Frequency modulation","level":3,"score":0.42396826},{"id":"https://openalex.org/C74064498","wikidata":"https://www.wikidata.org/wiki/Q3396184","display_name":"Radio frequency","level":2,"score":0.1382244},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.11331555},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.096490085},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09450659},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8682710","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.5}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":16,"referenced_works":["https://openalex.org/W1524333225","https://openalex.org/W156237177","https://openalex.org/W1897037354","https://openalex.org/W1966264494","https://openalex.org/W1992272902","https://openalex.org/W2049123401","https://openalex.org/W2109622017","https://openalex.org/W2147676440","https://openalex.org/W2152483743","https://openalex.org/W2165712214","https://openalex.org/W2397147568","https://openalex.org/W2399383297","https://openalex.org/W2514966966","https://openalex.org/W2526425061","https://openalex.org/W2889508099","https://openalex.org/W2899640612"],"related_works":["https://openalex.org/W4385672897","https://openalex.org/W2387604097","https://openalex.org/W2373675101","https://openalex.org/W2359140082","https://openalex.org/W2353790262","https://openalex.org/W2100203012","https://openalex.org/W2018086531","https://openalex.org/W1980297060","https://openalex.org/W1556565948","https://openalex.org/W106160982"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,54],"propose":[4],"a":[5,57,72],"novel":[6],"method":[7],"to":[8],"capture":[9],"energy":[10],"modulations":[11],"from":[12],"different":[13,38],"frequency":[14],"bands":[15],"in":[16,28,37,59],"speech":[17],"into":[18],"frame-level":[19],"feature":[20],"vectors,":[21],"called":[22],"Modulation-vectors":[23],"or":[24],"M-vectors,":[25],"for":[26,44],"use":[27],"Automatic":[29],"Speech":[30],"Recognition":[31],"(ASR)":[32],"systems.":[33],"We":[34],"show":[35],"that":[36,70],"multi-stream":[39],"setups,":[40],"with":[41],"parallel":[42],"streams":[43],"M-vectors":[45],"and":[46,69,74,82],"the":[47,88],"popular":[48],"Mel-frequency":[49],"Cepstral":[50],"Coefficient":[51],"(MFCC)":[52],"features,":[53],"can":[55],"realize":[56],"boost":[58],"word":[60],"recognition":[61],"performance":[62],"of":[63,71],"end-to-end":[64],"systems":[65],"by":[66,79],"\u2248":[67,80,83],"5%,":[68],"monophone":[73],"triphone":[75],"HMM-GMM":[76],"ASR":[77],"system":[78],"18%":[81],"16%":[84],"respectively":[85],"over":[86],"using":[87],"traditional":[89],"MFCC":[90],"features.":[91]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2940111934","counts_by_year":[{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":1}],"updated_date":"2025-01-21T04:18:55.309605","created_date":"2019-04-25"}