{"id":"https://openalex.org/W4390897490","doi":"https://doi.org/10.48550/arxiv.2401.06387","title":"Towards High-Quality and Efficient Speech Bandwidth Extension with Parallel Amplitude and Phase Prediction","display_name":"Towards High-Quality and Efficient Speech Bandwidth Extension with Parallel Amplitude and Phase Prediction","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4390897490","doi":"https://doi.org/10.48550/arxiv.2401.06387"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2401.06387","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2401.06387","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072371384","display_name":"Ye-Xin Lu","orcid":"https://orcid.org/0009-0009-8026-0702"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Ye-Xin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045907056","display_name":"Yang Ai","orcid":"https://orcid.org/0000-0001-6668-022X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ai, Yang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067982618","display_name":"Hui-Peng Du","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Hui-Peng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5059767940","display_name":"Zhen-Hua Ling","orcid":"https://orcid.org/0000-0001-7853-5273"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ling, Zhen-Hua","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.999514,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":84,"max":92},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9897,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9687,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminator","display_name":"Discriminator","score":0.78808653},{"id":"https://openalex.org/keywords/narrowband","display_name":"Narrowband","score":0.5581739},{"id":"https://openalex.org/keywords/bandwidth-extension","display_name":"Bandwidth extension","score":0.553876}],"concepts":[{"id":"https://openalex.org/C2779803651","wikidata":"https://www.wikidata.org/wiki/Q5282088","display_name":"Discriminator","level":3,"score":0.78808653},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.74549717},{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.5899538},{"id":"https://openalex.org/C2776096036","wikidata":"https://www.wikidata.org/wiki/Q1140483","display_name":"Narrowband","level":2,"score":0.5581739},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.5573536},{"id":"https://openalex.org/C9387945","wikidata":"https://www.wikidata.org/wiki/Q4854770","display_name":"Bandwidth extension","level":4,"score":0.553876},{"id":"https://openalex.org/C180205008","wikidata":"https://www.wikidata.org/wiki/Q159190","display_name":"Amplitude","level":2,"score":0.49312422},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4915236},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33793992},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.15958118},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.15332273},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.107325256},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0953058},{"id":"https://openalex.org/C554190296","wikidata":"https://www.wikidata.org/wiki/Q47528","display_name":"Radar","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2401.06387","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2401.06387","pdf_url":"http://arxiv.org/pdf/2401.06387","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2401.06387","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2401.06387","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.64}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4319862422","https://openalex.org/W4299414243","https://openalex.org/W3142616080","https://openalex.org/W3135807828","https://openalex.org/W2826145399","https://openalex.org/W2790274877","https://openalex.org/W2400697507","https://openalex.org/W2147611552","https://openalex.org/W2132462584","https://openalex.org/W2114712585"],"abstract_inverted_index":{"Speech":[0],"bandwidth":[1,9],"extension":[2,215],"(BWE)":[3],"refers":[4],"to":[5,166,204,211],"widening":[6],"the":[7,15,74,78,88,92,101,104,114,128,167,173,209,213,217,226],"frequency":[8],"range":[10],"of":[11,36,103,121,144,153,162,216,228],"speech":[12,16,50,106,145],"signals,":[13,107],"enhancing":[14],"quality":[17,146],"towards":[18],"brighter":[19],"and":[20,38,47,77,85,96,117,124,157,170,193],"fuller.":[21],"This":[22],"paper":[23],"proposes":[24],"a":[25,67,110,119,188,200],"generative":[26],"adversarial":[27],"network":[28],"(GAN)":[29],"based":[30,59],"BWE":[31,148,230],"model":[32],"with":[33,70,82],"parallel":[34],"prediction":[35],"Amplitude":[37],"Phase":[39],"spectra,":[40],"named":[41],"AP-BWE,":[42],"which":[43,221],"achieves":[44,139],"both":[45,154],"high-quality":[46],"efficient":[48],"wideband":[49],"waveform":[51,115,180],"generation.":[52],"The":[53],"proposed":[54,137,174],"AP-BWE":[55,138,175,207],"generator":[56],"is":[57,208,222],"entirely":[58],"on":[60,187,199],"convolutional":[61],"neural":[62],"networks":[63],"(CNNs).":[64],"It":[65],"features":[66],"dual-stream":[68],"architecture":[69,169],"mutual":[71],"interaction,":[72],"where":[73],"amplitude":[75,95,123],"stream":[76,80],"phase":[79,97,125,219],"communicate":[81],"each":[83],"other":[84],"respectively":[86],"extend":[87],"high-frequency":[89,218],"components":[90],"from":[91],"input":[93],"narrowband":[94],"spectra.":[98],"To":[99],"improve":[100],"naturalness":[102],"extended":[105],"we":[108],"employ":[109],"multi-period":[111],"discriminator":[112],"at":[113,127],"level":[116],"design":[118],"pair":[120],"multi-resolution":[122],"discriminators":[126],"spectral":[129],"level,":[130],"respectively.":[131],"Experimental":[132],"results":[133],"demonstrate":[134],"that":[135],"our":[136,205],"state-of-the-art":[140],"performance":[141],"in":[142],"terms":[143,161],"for":[147,224],"tasks":[149],"targeting":[150],"sampling":[151],"rates":[152],"16":[155],"kHz":[156,179],"48":[158,178],"kHz.":[159],"In":[160],"generation":[163],"efficiency,":[164],"due":[165],"all-convolutional":[168],"all-frame-level":[171],"operations,":[172],"can":[176],"generate":[177],"samples":[181],"292.3":[182],"times":[183,195],"faster":[184,196],"than":[185,197],"real-time":[186,198],"single":[189,201],"RTX":[190],"4090":[191],"GPU":[192],"18.1":[194],"CPU.":[202],"Notably,":[203],"knowledge,":[206],"first":[210],"achieve":[212],"direct":[214],"spectrum,":[220],"beneficial":[223],"improving":[225],"effectiveness":[227],"existing":[229],"methods.":[231]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4390897490","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-01-06T06:35:31.035408","created_date":"2024-01-16"}