{"id":"https://openalex.org/W4385938237","doi":"https://doi.org/10.1109/lsp.2023.3305912","title":"Long-Frame-Shift Neural Speech Phase Prediction With Spectral Continuity Enhancement and Interpolation Error Compensation","display_name":"Long-Frame-Shift Neural Speech Phase Prediction With Spectral Continuity Enhancement and Interpolation Error Compensation","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4385938237","doi":"https://doi.org/10.1109/lsp.2023.3305912"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2023.3305912","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2308.08850","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045907056","display_name":"Yang Ai","orcid":"https://orcid.org/0000-0001-6668-022X"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Ai","raw_affiliation_strings":["National Engineering Research Center of Speech and Language Information Processing, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Research Center of Speech and Language Information Processing, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072371384","display_name":"Ye-Xin Lu","orcid":"https://orcid.org/0009-0009-8026-0702"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ye-Xin Lu","raw_affiliation_strings":["National Engineering Research Center of Speech and Language Information Processing, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Research Center of Speech and Language Information Processing, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059767940","display_name":"Zhen-Hua Ling","orcid":"https://orcid.org/0000-0001-7853-5273"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhen-Hua Ling","raw_affiliation_strings":["National Engineering Research Center of Speech and Language Information Processing, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Research Center of Speech and Language Information Processing, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.352,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.606989,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":68,"max":79},"biblio":{"volume":"30","issue":null,"first_page":"1097","last_page":"1101"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9988,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9988,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9984,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9942,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpolation","display_name":"Interpolation","score":0.6437355},{"id":"https://openalex.org/keywords/decimation","display_name":"Decimation","score":0.47310305}],"concepts":[{"id":"https://openalex.org/C137800194","wikidata":"https://www.wikidata.org/wiki/Q11713455","display_name":"Interpolation (computer graphics)","level":3,"score":0.6437355},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5895994},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5385424},{"id":"https://openalex.org/C44280652","wikidata":"https://www.wikidata.org/wiki/Q104837","display_name":"Phase (matter)","level":2,"score":0.4863806},{"id":"https://openalex.org/C180205008","wikidata":"https://www.wikidata.org/wiki/Q159190","display_name":"Amplitude","level":2,"score":0.486064},{"id":"https://openalex.org/C173642442","wikidata":"https://www.wikidata.org/wiki/Q1253346","display_name":"Decimation","level":3,"score":0.47310305},{"id":"https://openalex.org/C166386157","wikidata":"https://www.wikidata.org/wiki/Q1477735","display_name":"Short-time Fourier transform","level":4,"score":0.4575457},{"id":"https://openalex.org/C4839761","wikidata":"https://www.wikidata.org/wiki/Q212111","display_name":"Spectral line","level":2,"score":0.44410327},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4327752},{"id":"https://openalex.org/C102519508","wikidata":"https://www.wikidata.org/wiki/Q6520159","display_name":"Fourier transform","level":2,"score":0.39152795},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.37346292},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35078204},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.342088},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.23372361},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.21899039},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.18338528},{"id":"https://openalex.org/C203024314","wikidata":"https://www.wikidata.org/wiki/Q1365258","display_name":"Fourier analysis","level":3,"score":0.1816217},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.17672902},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.17082319},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.07560882},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2023.3305912","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2308.08850","pdf_url":"https://arxiv.org/pdf/2308.08850","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2308.08850","pdf_url":"https://arxiv.org/pdf/2308.08850","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.68,"display_name":"Peace, justice, and strong institutions","id":"https://metadata.un.org/sdg/16"}],"grants":[{"funder":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities","award_id":"WK2100000033"}],"datasets":[],"versions":[],"referenced_works_count":19,"referenced_works":["https://openalex.org/W2120847449","https://openalex.org/W2152859600","https://openalex.org/W2749881488","https://openalex.org/W2874689226","https://openalex.org/W2902806197","https://openalex.org/W2962911378","https://openalex.org/W2963609956","https://openalex.org/W2964243274","https://openalex.org/W2964328256","https://openalex.org/W2977305403","https://openalex.org/W2989528444","https://openalex.org/W3010751348","https://openalex.org/W3015213667","https://openalex.org/W3096709315","https://openalex.org/W3161748343","https://openalex.org/W4224918667","https://openalex.org/W4288594364","https://openalex.org/W4372260247","https://openalex.org/W4372348072"],"related_works":["https://openalex.org/W4313577963","https://openalex.org/W4225639054","https://openalex.org/W4200633555","https://openalex.org/W3029686646","https://openalex.org/W2340678901","https://openalex.org/W2115073044","https://openalex.org/W1967434260","https://openalex.org/W1526941731","https://openalex.org/W1513711368","https://openalex.org/W1508038409"],"abstract_inverted_index":{"Speech":[0],"phase":[1,19,27,34,70,80,123,137,162,172],"prediction,":[2],"which":[3,40,74],"is":[4],"a":[5,65],"significant":[6],"research":[7],"focus":[8],"in":[9,159],"the":[10,45,114,135,151,165],"field":[11],"of":[12,54,78,91],"signal":[13],"processing,":[14],"aims":[15],"to":[16,32,112,120],"recover":[17],"speech":[18,26,69],"spectra":[20,35,81,102,124,138,163],"from":[21,82,106,141],"amplitude-related":[22],"features.":[23],"However,":[24],"existing":[25],"prediction":[28,71,77,95],"methods":[29],"are":[30,41,103,139],"constrained":[31],"recovering":[33],"with":[36],"short":[37],"frame":[38],"shifts,":[39],"considerably":[42],"smaller":[43],"than":[44,164],"theoretical":[46],"upper":[47],"bound":[48],"required":[49],"for":[50,131],"exact":[51],"waveform":[52],"reconstruction":[53],"short-time":[55],"Fourier":[56],"transform":[57],"(STFT).":[58],"To":[59],"tackle":[60],"this":[61],"issue,":[62],"we":[63],"present":[64],"novel":[66],"long-frame-shift":[67,79,83,107,136,161],"neural":[68],"(LFS-NSPP)":[72],"method":[73,89,154],"enables":[75],"precise":[76],"log":[84,100],"amplitude":[85,101],"spectra.":[86],"The":[87,98],"proposed":[88,152],"consists":[90],"three":[92],"stages:":[93],"interpolation,":[94],"and":[96,117,169],"decimation.":[97,146],"short-frame-shift":[99,122,142],"first":[104],"constructed":[105],"ones":[108,143],"through":[109,144],"frequency-by-frequency":[110],"interpolation":[111,132],"enhance":[113],"spectral":[115],"continuity,":[116],"then":[118],"employed":[119],"predict":[121],"using":[125],"an":[126],"NSPP":[127,167],"model,":[128],"thereby":[129],"compensating":[130],"errors.":[133],"Ultimately,":[134],"obtained":[140],"frame-by-frame":[145],"Experimental":[147],"results":[148],"show":[149],"that":[150],"LFS-NSPP":[153],"can":[155],"yield":[156],"superior":[157],"quality":[158],"predicting":[160],"original":[166],"model":[168],"other":[170],"signal-processing-based":[171],"estimation":[173],"algorithms.":[174]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4385938237","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2024-12-10T08:48:05.714262","created_date":"2023-08-18"}