{"id":"https://openalex.org/W4221142789","doi":"https://doi.org/10.1109/taslp.2022.3190717","title":"CampNet: Context-Aware Mask Prediction for End-to-End Text-Based Speech Editing","display_name":"CampNet: Context-Aware Mask Prediction for End-to-End Text-Based Speech Editing","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4221142789","doi":"https://doi.org/10.1109/taslp.2022.3190717"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2022.3190717","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2202.09950","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071038184","display_name":"Tao Wang","orcid":"https://orcid.org/0000-0003-1490-6973"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Wang","raw_affiliation_strings":["National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Science, Beijing, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038","https://openalex.org/I4210100255"]},{"raw_affiliation_string":"National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Science, Beijing, China","institution_ids":["https://openalex.org/I4210112150"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078525423","display_name":"Jiangyan Yi","orcid":"https://orcid.org/0000-0003-2422-4618"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiangyan Yi","raw_affiliation_strings":["National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073918837","display_name":"Ruibo Fu","orcid":"https://orcid.org/0000-0001-9598-1881"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruibo Fu","raw_affiliation_strings":["National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112613657","display_name":"Jianhua Tao","orcid":null},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianhua Tao","raw_affiliation_strings":["National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111223590","display_name":"Zhengqi Wen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhengqi Wen","raw_affiliation_strings":["National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.811,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.99997,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":83,"max":85},"biblio":{"volume":"30","issue":null,"first_page":"2241","last_page":"2254"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.999,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-error","display_name":"Speech error","score":0.4183244}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8374374},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.76356244},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.64315003},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5850794},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5259526},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.49815893},{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.47527876},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.44274214},{"id":"https://openalex.org/C541956065","wikidata":"https://www.wikidata.org/wiki/Q2250680","display_name":"Speech error","level":3,"score":0.4183244},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38952434},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38949823},{"id":"https://openalex.org/C43617652","wikidata":"https://www.wikidata.org/wiki/Q7575399","display_name":"Speech production","level":2,"score":0.34329852},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.109825045},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2022.3190717","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2202.09950","pdf_url":"http://arxiv.org/pdf/2202.09950","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2202.09950","pdf_url":"http://arxiv.org/pdf/2202.09950","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"display_name":"Peace, justice, and strong institutions","score":0.66,"id":"https://metadata.un.org/sdg/16"}],"grants":[{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":"61901473"},{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":"61831022"},{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":"62101553"}],"datasets":[],"versions":[],"referenced_works_count":33,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1980618402","https://openalex.org/W2095705004","https://openalex.org/W2106560396","https://openalex.org/W2107860279","https://openalex.org/W2152205330","https://openalex.org/W2299257856","https://openalex.org/W2327678559","https://openalex.org/W2428180336","https://openalex.org/W2471520273","https://openalex.org/W2518172956","https://openalex.org/W2519091744","https://openalex.org/W2576309025","https://openalex.org/W2584329820","https://openalex.org/W2598638573","https://openalex.org/W2737697117","https://openalex.org/W2803229097","https://openalex.org/W2807961574","https://openalex.org/W2808706139","https://openalex.org/W2903739847","https://openalex.org/W2963091184","https://openalex.org/W2963300588","https://openalex.org/W2963609956","https://openalex.org/W2964243274","https://openalex.org/W2970006822","https://openalex.org/W2972359262","https://openalex.org/W3015338123","https://openalex.org/W3096159803","https://openalex.org/W3098557217","https://openalex.org/W3104557543","https://openalex.org/W3160438760","https://openalex.org/W4225596771","https://openalex.org/W4240592325"],"related_works":["https://openalex.org/W652196294","https://openalex.org/W3149582125","https://openalex.org/W2465421051","https://openalex.org/W2368700418","https://openalex.org/W2169632867","https://openalex.org/W2164483251","https://openalex.org/W1984347656","https://openalex.org/W1965141925","https://openalex.org/W1927421023","https://openalex.org/W10581632"],"abstract_inverted_index":{"The":[0,96,235],"text-based":[1,86,101,149,155],"speech":[2,8,34,87,102,110,120,134,150,171,177,198,232,246,261,317],"editor":[3],"allows":[4],"the":[5,19,25,60,65,76,100,114,119,128,133,137,141,145,176,197,218,260,291,294,301,314],"editing":[6,22,88,103,262,318],"of":[7,21,28,67,109,148,170,199,220,244,279,293,316],"through":[9],"intuitive":[10],"cutting,":[11],"copying,":[12],"and":[13,70,111,131,162,184,216,237,255,274,281],"pasting":[14],"operations":[15,156,165],"to":[16,39,49,53,136,174,179,289],"speed":[17],"up":[18],"process":[20,104],"speech.":[23],"However,":[24],"major":[26],"drawback":[27],"current":[29],"systems":[30],"is":[31,45,192],"that":[32,259,304],"edited":[33,129],"often":[35,63],"sounds":[36],"unnatural":[37,125],"due":[38],"cut-copy-paste":[40],"operation.":[41],"In":[42],"addition,":[43],"it":[44],"not":[46,57],"obvious":[47],"how":[48],"synthesize":[50,132,175,196],"records":[51],"according":[52],"a":[54,83,187,206,228],"new":[55,229],"word":[56],"appearing":[58],"in":[59,127,140,182],"transcript,":[61],"which":[62,194,226],"needs":[64],"help":[66],"text-to-speech":[68],"(TTS)":[69],"voice":[71],"conversion":[72],"(VC)":[73],"technology":[74],"at":[75,250],"same":[77],"time.":[78],"This":[79],"paper":[80],"first":[81],"proposes":[82],"novel":[84],"end-to-end":[85],"method":[89,191,209,276],"called":[90],"context-aware":[91],"mask":[92],"prediction":[93],"network":[94],"(CampNet).":[95],"model":[97],"can":[98,123,166,195,247,311],"simulate":[99],"by":[105,117],"randomly":[106],"masking":[107],"part":[108],"then":[112],"predicting":[113],"masked":[115],"region":[116,130],"sensing":[118],"context.":[121],"It":[122],"solve":[124],"prosody":[126],"corresponding":[135,178],"unseen":[138],"words":[139],"transcript.":[142],"Secondly,":[143],"for":[144,214,231,319],"possible":[146],"operation":[147],"editing,":[151,273],"we":[152,204],"design":[153],"three":[154],"based":[157,223,264],"on":[158,224,253,265,297],"CampNet:":[159],"deletion,":[160],"insertion,":[161],"replacement.":[163],"These":[164],"cover":[167],"various":[168],"situations":[169],"editing.":[172],"Thirdly,":[173],"long":[180],"text":[181],"insertion":[183],"replacement":[185],"operations,":[186],"word-level":[188],"autoregressive":[189],"generation":[190],"proposed,":[193],"arbitrary":[200],"length":[201],"text.":[202],"Fourthly,":[203],"propose":[205],"speaker":[207,305],"adaptation":[208,306],"using":[210],"only":[211,308],"one":[212,309],"sentence":[213,310],"CampNet":[215,266,295],"explore":[217,290],"ability":[219],"few-shot":[221],"learning":[222],"CampNet,":[225],"provides":[227],"idea":[230],"forgery":[233],"tasks.":[234],"subjective":[236],"objective":[238],"experiments1":[242],"Examples":[243],"generated":[245],"be":[248],"found":[249],"https://hairuo55.github.io/CampNet.":[251],"":[252],"VCTK":[254],"LibriTTS":[256],"datasets":[257],"show":[258],"results":[263],"are":[267],"better":[268],"than":[269],"TTS":[270,280],"technology,":[271],"manual":[272],"VoCo":[275],"(the":[277],"combination":[278],"VC).":[282],"We":[283],"also":[284],"conduct":[285],"detailed":[286],"ablation":[287],"experiments":[288],"effect":[292],"structure":[296],"its":[298],"performance.":[299],"Finally,":[300],"experiment":[302],"shows":[303],"with":[307],"further":[312],"improve":[313],"naturalness":[315],"one-shot":[320],"learning.":[321]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4221142789","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2}],"updated_date":"2024-12-11T08:01:55.872780","created_date":"2022-04-03"}