{"id":"https://openalex.org/W4403964500","doi":"https://doi.org/10.48550/arxiv.2410.04990","title":"Stage-Wise and Prior-Aware Neural Speech Phase Prediction","display_name":"Stage-Wise and Prior-Aware Neural Speech Phase Prediction","publication_year":2024,"publication_date":"2024-10-07","ids":{"openalex":"https://openalex.org/W4403964500","doi":"https://doi.org/10.48550/arxiv.2410.04990"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.04990","pdf_url":"http://arxiv.org/pdf/2410.04990","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2410.04990","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115602455","display_name":"Fei Liu","orcid":"https://orcid.org/0000-0002-9814-4226"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Fei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045907056","display_name":"Yang Ai","orcid":"https://orcid.org/0000-0001-6668-022X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ai, Yang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067982618","display_name":"Hui-Peng Du","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Hui-Peng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072371384","display_name":"Ye-Xin Lu","orcid":"https://orcid.org/0009-0009-8026-0702"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Ye-Xin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066498315","display_name":"Rui-Chen Zheng","orcid":"https://orcid.org/0009-0000-8074-9553"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Rui-Chen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5059767940","display_name":"Zhen-Hua Ling","orcid":"https://orcid.org/0000-0001-7853-5273"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ling, Zhen-Hua","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9975,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9975,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.975,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9739,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C146357865","wikidata":"https://www.wikidata.org/wiki/Q1123245","display_name":"Stage (stratigraphy)","level":2,"score":0.5383213},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.48048344},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.46037182},{"id":"https://openalex.org/C44280652","wikidata":"https://www.wikidata.org/wiki/Q104837","display_name":"Phase (matter)","level":2,"score":0.42118445},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3382176},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.1599712},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.11044937},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.04990","pdf_url":"http://arxiv.org/pdf/2410.04990","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.04990","pdf_url":"http://arxiv.org/pdf/2410.04990","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4396701345","https://openalex.org/W4396696052","https://openalex.org/W4391913857","https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2001405890"],"abstract_inverted_index":{"This":[0],"paper":[1],"proposes":[2],"a":[3,35,53,81,98],"novel":[4],"Stage-wise":[5],"and":[6,74,135],"Prior-aware":[7],"Neural":[8],"Speech":[9],"Phase":[10],"Prediction":[11],"(SP-NSPP)":[12],"model,":[13],"which":[14],"predicts":[15],"the":[16,28,41,49,60,72,89,92,105,120,131],"phase":[17,38,56,82,117,125,133,142],"spectrum":[18,22,39,51,57,83],"from":[19,40],"input":[20],"amplitude":[21,42,50],"by":[23,78],"two-stage":[24],"neural":[25,114],"networks.":[26],"In":[27],"initial":[29],"prior-construction":[30],"stage,":[31],"we":[32,95],"preliminarily":[33],"predict":[34],"rough":[36],"prior":[37,61],"spectrum.":[43],"The":[44],"subsequent":[45],"refinement":[46,106],"stage":[47],"transforms":[48],"into":[52],"refined":[54,93],"high-quality":[55],"conditioned":[58],"on":[59],"phase.":[62],"Networks":[63],"in":[64,104,157],"both":[65],"stages":[66],"use":[67],"ConvNeXt":[68],"v2":[69],"blocks":[70],"as":[71],"backbone":[73],"adopt":[75],"adversarial":[76],"training":[77,137],"innovatively":[79],"introducing":[80,130],"discriminator":[84],"(PSD).":[85],"To":[86],"further":[87],"improve":[88],"continuity":[90],"of":[91,153],"phase,":[94],"also":[96],"incorporate":[97],"time-frequency":[99],"integrated":[100],"difference":[101],"(TFID)":[102],"loss":[103],"stage.":[107],"Experimental":[108],"results":[109],"confirm":[110],"that,":[111],"compared":[112],"to":[113,129,140],"network-based":[115],"no-prior":[116],"prediction":[118,126],"methods,":[119],"proposed":[121,146],"SP-NSPP":[122,147],"achieves":[123],"higher":[124,158],"accuracy,":[127],"thanks":[128],"coarse":[132],"priors":[134],"diverse":[136],"criteria.":[138],"Compared":[139],"iterative":[141],"estimation":[143],"algorithms,":[144],"our":[145],"does":[148],"not":[149],"require":[150],"multiple":[151],"rounds":[152],"staged":[154],"iterations,":[155],"resulting":[156],"generation":[159],"efficiency.":[160]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4403964500","counts_by_year":[],"updated_date":"2024-12-24T01:57:08.210614","created_date":"2024-11-01"}