{"id":"https://openalex.org/W4386148299","doi":"https://doi.org/10.48550/arxiv.2308.11940","title":"Audio Generation with Multiple Conditional Diffusion Model","display_name":"Audio Generation with Multiple Conditional Diffusion Model","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4386148299","doi":"https://doi.org/10.48550/arxiv.2308.11940"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2308.11940","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2308.11940","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022081273","display_name":"Zhifang Guo","orcid":"https://orcid.org/0000-0002-3728-6319"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Zhifang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083639544","display_name":"Jianguo Mao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mao, Jianguo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100600683","display_name":"Rui Tao","orcid":"https://orcid.org/0000-0001-6333-513X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao, Rui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017782552","display_name":"Long Yan","orcid":"https://orcid.org/0000-0002-2815-1332"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Long","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017058806","display_name":"Kazushige Ouchi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ouchi, Kazushige","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100410286","display_name":"Hong Liu","orcid":"https://orcid.org/0000-0002-0896-8409"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Hong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100402345","display_name":"Xiangdong Wang","orcid":"https://orcid.org/0000-0002-4226-3250"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Xiangdong","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":65},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9989,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9989,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9861,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9826,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/fuse","display_name":"Fuse (electrical)","score":0.4790248}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8448565},{"id":"https://openalex.org/C113954288","wikidata":"https://www.wikidata.org/wiki/Q186885","display_name":"Timestamp","level":2,"score":0.8438134},{"id":"https://openalex.org/C48209547","wikidata":"https://www.wikidata.org/wiki/Q1331104","display_name":"Controllability","level":2,"score":0.6855275},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5136061},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.48733053},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.4790248},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.43786177},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42081344},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.110556036},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2308.11940","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2308.11940","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2308.11940","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4239246781","https://openalex.org/W4205698120","https://openalex.org/W3201620972","https://openalex.org/W2561315646","https://openalex.org/W2542825942","https://openalex.org/W2332386680","https://openalex.org/W2248621902","https://openalex.org/W2164760767","https://openalex.org/W2036697162","https://openalex.org/W2003779889"],"abstract_inverted_index":{"Text-based":[0],"audio":[1,138,167],"generation":[2],"models":[3,41],"have":[4],"limitations":[5],"as":[6,56],"they":[7],"cannot":[8],"encompass":[9],"all":[10],"the":[11,35,59,67,78,105,110,113,120,137,151],"information":[12],"in":[13],"audio,":[14],"leading":[15],"to":[16,58,101,119,149,164],"restricted":[17],"controllability":[18,36,152],"when":[19],"relying":[20],"solely":[21],"on":[22],"text.":[23,60],"To":[24,76],"address":[25],"this":[26],"issue,":[27],"we":[28,82,128],"propose":[29],"a":[30,84,93,98,133,144],"novel":[31],"model":[32,96,116,159],"that":[33,89,157],"enhances":[34],"of":[37,73,80,112,122,146],"existing":[38,130],"pre-trained":[39,114],"text-to-audio":[40,115],"by":[42,92],"incorporating":[43],"additional":[44,106],"conditions":[45,107,141],"including":[46],"content":[47],"(timestamp)":[48],"and":[49,53,71,97,103,125,139,142,171],"style":[50],"(pitch":[51],"contour":[52],"energy":[54,72],"contour)":[55],"supplements":[57],"This":[61],"approach":[62],"achieves":[63,161],"fine-grained":[64,162],"control":[65,86,163],"over":[66],"temporal":[68],"order,":[69],"pitch,":[70],"generated":[74],"audio.":[75],"preserve":[77],"diversity":[79],"generation,":[81],"employ":[83],"trainable":[85,99],"condition":[87],"encoder":[88],"is":[90],"enhanced":[91],"large":[94],"language":[95],"Fusion-Net":[100],"encode":[102],"fuse":[104],"while":[108],"keeping":[109],"weights":[111],"frozen.":[117],"Due":[118],"lack":[121],"suitable":[123],"datasets":[124,131],"evaluation":[126,147],"metrics,":[127],"consolidate":[129],"into":[132],"new":[134],"dataset":[135,173],"comprising":[136],"corresponding":[140],"use":[143],"series":[145],"metrics":[148],"evaluate":[150],"performance.":[153],"Experimental":[154],"results":[155],"demonstrate":[156],"our":[158,172],"successfully":[160],"accomplish":[165],"controllable":[166],"generation.":[168],"Audio":[169],"samples":[170],"are":[174],"publicly":[175],"available":[176],"at":[177],"https://conditionaudiogen.github.io/conditionaudiogen/":[178]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4386148299","counts_by_year":[],"updated_date":"2025-04-14T13:42:09.570090","created_date":"2023-08-25"}