{"id":"https://openalex.org/W4399418292","doi":"https://doi.org/10.48550/arxiv.2406.02250","title":"Multi-Stage Speech Bandwidth Extension with Flexible Sampling Rate\n Control","display_name":"Multi-Stage Speech Bandwidth Extension with Flexible Sampling Rate\n Control","publication_year":2024,"publication_date":"2024-06-04","ids":{"openalex":"https://openalex.org/W4399418292","doi":"https://doi.org/10.48550/arxiv.2406.02250"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2406.02250","pdf_url":"https://arxiv.org/pdf/2406.02250","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2406.02250","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072371384","display_name":"Ye-Xin Lu","orcid":"https://orcid.org/0009-0009-8026-0702"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Ye-Xin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045907056","display_name":"Yang Ai","orcid":"https://orcid.org/0000-0001-6668-022X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ai, Yang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004434747","display_name":"Zheng-Yan Sheng","orcid":"https://orcid.org/0009-0002-0638-5530"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sheng, Zheng-Yan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5059767940","display_name":"Zhen-Hua Ling","orcid":"https://orcid.org/0000-0001-7853-5273"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ling, Zhen-Hua","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":83},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9923,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9923,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9853,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9429,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bandwidth-extension","display_name":"Bandwidth extension","score":0.4760556}],"concepts":[{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.5751346},{"id":"https://openalex.org/C2778029271","wikidata":"https://www.wikidata.org/wiki/Q5421931","display_name":"Extension (predicate logic)","level":2,"score":0.5640879},{"id":"https://openalex.org/C146357865","wikidata":"https://www.wikidata.org/wiki/Q1123245","display_name":"Stage (stratigraphy)","level":2,"score":0.5307968},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.49563277},{"id":"https://openalex.org/C9387945","wikidata":"https://www.wikidata.org/wiki/Q4854770","display_name":"Bandwidth extension","level":4,"score":0.4760556},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.4410177},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.430481},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.41687176},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.23018894},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.20912877},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.18311355},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.07681954},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.0},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2406.02250","pdf_url":"https://arxiv.org/pdf/2406.02250","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2406.02250","pdf_url":"https://arxiv.org/pdf/2406.02250","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4319862422","https://openalex.org/W3208832629","https://openalex.org/W3207737697","https://openalex.org/W3144767625","https://openalex.org/W2991225285","https://openalex.org/W2595254936","https://openalex.org/W2059847929","https://openalex.org/W2017964352","https://openalex.org/W1999397223","https://openalex.org/W1971598839"],"abstract_inverted_index":{"The":[0,58,90],"majority":[1],"of":[2,13,44,55,65,126],"existing":[3],"speech":[4,34,84,114,118],"bandwidth":[5],"extension":[6],"(BWE)":[7],"methods":[8,116],"operate":[9],"under":[10],"the":[11,83,97,123],"constraint":[12],"fixed":[14],"source":[15,45],"and":[16,46,51,78,101,137],"target":[17,47],"sampling":[18,48],"rates,":[19],"which":[20,39],"limits":[21],"their":[22],"flexibility":[23],"in":[24,117],"practical":[25],"applications.":[26],"In":[27],"this":[28],"paper,":[29],"we":[30],"propose":[31],"a":[32,42,63,72],"multi-stage":[33],"BWE":[35,66,115],"model":[36,61],"named":[37],"MS-BWE,":[38],"can":[40,128],"handle":[41],"set":[43],"rate":[49],"pairs":[50],"achieve":[52,129],"flexible":[53],"extensions":[54],"frequency":[56,85],"bandwidth.":[57],"proposed":[59,108],"MS-BWE":[60,109,127],"comprises":[62],"cascade":[64],"blocks,":[67],"with":[68],"each":[69],"block":[70],"featuring":[71],"dual-stream":[73],"architecture":[74],"to":[75,95,112],"realize":[76],"amplitude":[77],"phase":[79],"extension,":[80],"progressively":[81],"painting":[82],"bands":[86],"stage":[87],"by":[88],"stage.":[89],"teacher-forcing":[91],"strategy":[92],"is":[93,110],"employed":[94],"mitigate":[96],"discrepancy":[98],"between":[99],"training":[100],"inference.":[102],"Experimental":[103],"results":[104],"demonstrate":[105],"that":[106],"our":[107],"comparable":[111],"state-of-the-art":[113],"quality.":[119],"Regarding":[120],"generation":[121,125],"efficiency,":[122],"one-stage":[124],"over":[130],"one":[131],"thousand":[132],"times":[133,140],"real-time":[134],"on":[135,141],"GPU":[136],"about":[138],"sixty":[139],"CPU.":[142]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4399418292","counts_by_year":[],"updated_date":"2025-01-17T05:04:02.257413","created_date":"2024-06-08"}