{"id":"https://openalex.org/W4404570730","doi":"https://doi.org/10.48550/arxiv.2411.11123","title":"Pitch-and-Spectrum-Aware Singing Quality Assessment with Bias Correction\n and Model Fusion","display_name":"Pitch-and-Spectrum-Aware Singing Quality Assessment with Bias Correction\n and Model Fusion","publication_year":2024,"publication_date":"2024-11-17","ids":{"openalex":"https://openalex.org/W4404570730","doi":"https://doi.org/10.48550/arxiv.2411.11123"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.11123","pdf_url":"http://arxiv.org/pdf/2411.11123","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2411.11123","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089019283","display_name":"Yufei Shi","orcid":"https://orcid.org/0000-0002-6999-0191"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Yu-Fei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045907056","display_name":"Yang Ai","orcid":"https://orcid.org/0000-0001-6668-022X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ai, Yang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072371384","display_name":"Ye-Xin Lu","orcid":"https://orcid.org/0009-0009-8026-0702"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Ye-Xin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067982618","display_name":"Hui-Peng Du","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Hui-Peng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5059767940","display_name":"Zhen-Hua Ling","orcid":"https://orcid.org/0000-0001-7853-5273"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ling, Zhen-Hua","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9926,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9926,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9914,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10863","display_name":"Voice and Speech Disorders","score":0.9798,"subfield":{"id":"https://openalex.org/subfields/2737","display_name":"Physiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.8026453},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.55016017},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5110956},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.49314255},{"id":"https://openalex.org/C156778621","wikidata":"https://www.wikidata.org/wiki/Q1365748","display_name":"Spectrum (functional analysis)","level":2,"score":0.4925781},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.49236357},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.33239996},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.30511266},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.1364286},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.09823963},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.11123","pdf_url":"http://arxiv.org/pdf/2411.11123","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.11123","pdf_url":"http://arxiv.org/pdf/2411.11123","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2390529913","https://openalex.org/W2387733758","https://openalex.org/W2376664795","https://openalex.org/W2372249404","https://openalex.org/W2368036937","https://openalex.org/W2367547137","https://openalex.org/W2366077683","https://openalex.org/W2354994102","https://openalex.org/W2142368101","https://openalex.org/W1501596003"],"abstract_inverted_index":{"We":[0],"participated":[1],"in":[2],"track":[3],"2":[4],"of":[5,19],"the":[6,14,25,33,60,83],"VoiceMOS":[7],"Challenge":[8],"2024,":[9],"which":[10,71],"aimed":[11],"to":[12,90,104],"predict":[13],"mean":[15],"opinion":[16],"score":[17],"(MOS)":[18],"singing":[20,66],"samples.":[21],"Our":[22],"submission":[23,43],"secured":[24],"first":[26],"place":[27],"among":[28],"all":[29,118,122],"participating":[30],"teams,":[31],"excluding":[32],"official":[34],"baseline.":[35],"In":[36],"this":[37],"paper,":[38],"we":[39],"further":[40,105],"improve":[41],"our":[42,113],"and":[44,68,77,99],"propose":[45],"a":[46,86],"novel":[47],"Pitch-and-Spectrum-aware":[48],"Singing":[49],"Quality":[50],"Assessment":[51],"(PS-SQA)":[52],"method.":[53],"The":[54],"PS-SQA":[55,84,115],"is":[56],"designed":[57],"based":[58],"on":[59],"self-supervised-learning":[61],"(SSL)":[62],"MOS":[63],"predictor,":[64],"incorporating":[65],"pitch":[67,75],"spectral":[69],"information,":[70],"are":[72],"extracted":[73],"using":[74],"histogram":[76],"non-quantized":[78],"neural":[79],"codec,":[80],"respectively.":[81],"Additionally,":[82],"introduces":[85],"bias":[87],"correction":[88],"strategy":[89],"address":[91],"prediction":[92,107],"biases":[93],"caused":[94],"by":[95],"low-resource":[96],"training":[97],"samples,":[98],"employs":[100],"model":[101],"fusion":[102],"technology":[103],"enhance":[106],"accuracy.":[108],"Experimental":[109],"results":[110],"confirm":[111],"that":[112],"proposed":[114],"significantly":[116],"outperforms":[117],"competing":[119],"systems":[120],"across":[121],"system-level":[123],"metrics,":[124],"confirming":[125],"its":[126],"strong":[127],"sing":[128],"quality":[129],"assessment":[130],"capabilities.":[131]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4404570730","counts_by_year":[],"updated_date":"2024-12-13T07:14:51.924578","created_date":"2024-11-21"}