{"id":"https://openalex.org/W4319862214","doi":"https://doi.org/10.1109/slt54892.2023.10023199","title":"End-to-End Integration of Speech Recognition, Dereverberation, Beamforming, and Self-Supervised Learning Representation","display_name":"End-to-End Integration of Speech Recognition, Dereverberation, Beamforming, and Self-Supervised Learning Representation","publication_year":2023,"publication_date":"2023-01-09","ids":{"openalex":"https://openalex.org/W4319862214","doi":"https://doi.org/10.1109/slt54892.2023.10023199"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt54892.2023.10023199","pdf_url":null,"source":{"id":"https://openalex.org/S4363605953","display_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2210.10742","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042385500","display_name":"Yoshiki Masuyama","orcid":"https://orcid.org/0000-0002-5881-0474"},"institutions":[{"id":"https://openalex.org/I69740276","display_name":"Tokyo Metropolitan University","ror":"https://ror.org/00ws30h19","country_code":"JP","type":"education","lineage":["https://openalex.org/I69740276"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yoshiki Masuyama","raw_affiliation_strings":["Tokyo Metropolitan University, Japan"],"affiliations":[{"raw_affiliation_string":"Tokyo Metropolitan University, Japan","institution_ids":["https://openalex.org/I69740276"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050058892","display_name":"Xuankai Chang","orcid":"https://orcid.org/0000-0002-5221-5412"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xuankai Chang","raw_affiliation_strings":["Carnegie Mellon University, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047682990","display_name":"Samuele Cornell","orcid":"https://orcid.org/0000-0002-5358-1844"},"institutions":[{"id":"https://openalex.org/I122534668","display_name":"Marche Polytechnic University","ror":"https://ror.org/00x69rs40","country_code":"IT","type":"education","lineage":["https://openalex.org/I122534668"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Samuele Cornell","raw_affiliation_strings":["Università Politecnica delle Marche,Italy"],"affiliations":[{"raw_affiliation_string":"Università Politecnica delle Marche,Italy","institution_ids":["https://openalex.org/I122534668"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001291873","display_name":"Shinji Watanabe","orcid":"https://orcid.org/0000-0002-5970-8631"},"institutions":[{"id":"https://openalex.org/I69740276","display_name":"Tokyo Metropolitan University","ror":"https://ror.org/00ws30h19","country_code":"JP","type":"education","lineage":["https://openalex.org/I69740276"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shinji Watanabe","raw_affiliation_strings":["Tokyo Metropolitan University, Japan"],"affiliations":[{"raw_affiliation_string":"Tokyo Metropolitan University, Japan","institution_ids":["https://openalex.org/I69740276"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056281759","display_name":"Nobutaka Ono","orcid":"https://orcid.org/0000-0003-4242-2773"},"institutions":[{"id":"https://openalex.org/I69740276","display_name":"Tokyo Metropolitan University","ror":"https://ror.org/00ws30h19","country_code":"JP","type":"education","lineage":["https://openalex.org/I69740276"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Nobutaka Ono","raw_affiliation_strings":["Tokyo Metropolitan University, Japan"],"affiliations":[{"raw_affiliation_string":"Tokyo Metropolitan University, Japan","institution_ids":["https://openalex.org/I69740276"]}]}],"institution_assertions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.075,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.998274,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"260","last_page":"265"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9957,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.59545374},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.41434935},{"id":"https://openalex.org/keywords/representation","display_name":"Representation","score":0.41087168}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8087813},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.69913507},{"id":"https://openalex.org/C54197355","wikidata":"https://www.wikidata.org/wiki/Q5782992","display_name":"Beamforming","level":2,"score":0.67933184},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.59545374},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.55861974},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.44135532},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.43786317},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.41523296},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.41434935},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.41087168},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.40308627},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3934501},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.10403666},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.061543107},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt54892.2023.10023199","pdf_url":null,"source":{"id":"https://openalex.org/S4363605953","display_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2210.10742","pdf_url":"https://arxiv.org/pdf/2210.10742","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2210.10742","pdf_url":"https://arxiv.org/pdf/2210.10742","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, justice, and strong institutions","score":0.8}],"grants":[{"funder":"https://openalex.org/F4320320212","funder_display_name":"Japan Society for the Promotion of Science London","award_id":"JP21J21371"},{"funder":"https://openalex.org/F4320320907","funder_display_name":"Japan Science and Technology Corporation","award_id":"JPMJCR19A3"},{"funder":"https://openalex.org/F4320338075","funder_display_name":"Core Research for Evolutional Science and Technology","award_id":"JPMJCR19A3"}],"datasets":[],"versions":[],"referenced_works_count":47,"referenced_works":["https://openalex.org/W1828163288","https://openalex.org/W2127141656","https://openalex.org/W2148613904","https://openalex.org/W2160815625","https://openalex.org/W2164502538","https://openalex.org/W2168729028","https://openalex.org/W2242685705","https://openalex.org/W2327501763","https://openalex.org/W2398826216","https://openalex.org/W2559260703","https://openalex.org/W2640112133","https://openalex.org/W2766219058","https://openalex.org/W2884797218","https://openalex.org/W2905402910","https://openalex.org/W2944972166","https://openalex.org/W2954695182","https://openalex.org/W2962780374","https://openalex.org/W2962824709","https://openalex.org/W2962892438","https://openalex.org/W2963211739","https://openalex.org/W2972818416","https://openalex.org/W2995181338","https://openalex.org/W2998616931","https://openalex.org/W3007256793","https://openalex.org/W3008762051","https://openalex.org/W3032514799","https://openalex.org/W3096073522","https://openalex.org/W3097777922","https://openalex.org/W3119308075","https://openalex.org/W3144062511","https://openalex.org/W3163217847","https://openalex.org/W3197580070","https://openalex.org/W3198694222","https://openalex.org/W3205533980","https://openalex.org/W3207558756","https://openalex.org/W3209059054","https://openalex.org/W3209376089","https://openalex.org/W3209490467","https://openalex.org/W3209984917","https://openalex.org/W3217643883","https://openalex.org/W4210402803","https://openalex.org/W4226390724","https://openalex.org/W4281492411","https://openalex.org/W4285250921","https://openalex.org/W4296068785","https://openalex.org/W4297841651","https://openalex.org/W854541894"],"related_works":["https://openalex.org/W818226659","https://openalex.org/W4311414679","https://openalex.org/W3179968364","https://openalex.org/W2963170046","https://openalex.org/W2923631784","https://openalex.org/W2401089611","https://openalex.org/W2376244802","https://openalex.org/W2158075901","https://openalex.org/W2151749779","https://openalex.org/W1999612375"],"abstract_inverted_index":{"Self-supervised":[0],"learning":[1],"representation":[2],"(SSLR)":[3],"has":[4],"demonstrated":[5],"its":[6],"significant":[7],"effectiveness":[8,119],"in":[9,33,72],"automatic":[10],"speech":[11,29],"recognition":[12],"(ASR),":[13],"mainly":[14],"with":[15,27,44,80,101],"clean":[16],"speech.":[17],"Recent":[18],"work":[19],"pointed":[20],"out":[21],"the":[22,68,73,76,89,98,102,124],"strength":[23],"of":[24,86],"integrating":[25,54],"SSLR":[26,92],"single-channel":[28],"enhancement":[30],"for":[31],"ASR":[32,59],"noisy":[34],"environments.":[35],"This":[36],"paper":[37],"further":[38],"advances":[39],"this":[40],"integration":[41,100],"by":[42,53,96],"dealing":[43],"multi-channel":[45],"input.":[46],"We":[47],"propose":[48],"a":[49,61,81],"novel":[50],"end-to-end":[51,99],"architecture":[52],"dereverberation,":[55],"beamforming,":[56],"SSLR,":[57],"and":[58,113],"within":[60],"single":[62],"neural":[63],"network.":[64],"Our":[65],"system":[66],"achieves":[67],"best":[69],"performance":[70],"reported":[71],"literature":[74],"on":[75,123],"CHiME-4":[77],"6-channel":[78],"track":[79],"word":[82],"error":[83],"rate":[84],"(WER)":[85],"1.77%.":[87],"While":[88],"WavLM-based":[90],"strong":[91],"demonstrates":[93],"promising":[94],"results":[95],"itself,":[97],"weighted":[103],"power":[104],"minimization":[105],"distortionless":[106],"response":[107],"beamformer,":[108],"which":[109],"simultaneously":[110],"performs":[111],"dereverberation":[112],"denoising,":[114],"improves":[115],"WER":[116],"significantly.":[117],"Its":[118],"is":[120],"also":[121],"validated":[122],"REVERB":[125],"dataset.":[126]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4319862214","counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":6}],"updated_date":"2025-01-02T11:23:02.200097","created_date":"2023-02-11"}