{"id":"https://openalex.org/W3163658555","doi":"https://doi.org/10.1109/tcsvt.2021.3079897","title":"Towards an End-to-End Visual-to-Raw-Audio Generation With GAN","display_name":"Towards an End-to-End Visual-to-Raw-Audio Generation With GAN","publication_year":2021,"publication_date":"2021-05-13","ids":{"openalex":"https://openalex.org/W3163658555","doi":"https://doi.org/10.1109/tcsvt.2021.3079897","mag":"3163658555"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2021.3079897","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022634842","display_name":"Shiguang Liu","orcid":"https://orcid.org/0000-0003-2353-5318"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shiguang Liu","raw_affiliation_strings":["School of Computer Science and Technology, Division of Intelligence and Computing, Tianjin University, Tianjin, China","Tianjin Key Laboratory of Cognitive Computing and Application, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Division of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":[]},{"raw_affiliation_string":"Tianjin Key Laboratory of Cognitive Computing and Application, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100698035","display_name":"Sijia Li","orcid":"https://orcid.org/0000-0002-2244-7327"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sijia Li","raw_affiliation_strings":["Division of Intelligence and Computing, School of Computer Science and Technology, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Division of Intelligence and Computing, School of Computer Science and Technology, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065017752","display_name":"Haonan Cheng","orcid":"https://orcid.org/0000-0003-3407-4318"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haonan Cheng","raw_affiliation_strings":["Division of Intelligence and Computing, School of Computer Science and Technology, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Division of Intelligence and Computing, School of Computer Science and Technology, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.474,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.767734,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":92,"max":93},"biblio":{"volume":"32","issue":"3","first_page":"1299","last_page":"1312"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Audio Signal Classification and Analysis","score":0.9967,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Audio Signal Classification and Analysis","score":0.9967,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Interactive Evolutionary Music Systems and Instruments","score":0.9965,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks in Image Processing","score":0.9843,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.55755997},{"id":"https://openalex.org/keywords/sound-synthesis","display_name":"Sound Synthesis","score":0.537546},{"id":"https://openalex.org/keywords/environmental-sound-recognition","display_name":"Environmental Sound Recognition","score":0.530324},{"id":"https://openalex.org/keywords/representation-learning","display_name":"Representation Learning","score":0.529601},{"id":"https://openalex.org/keywords/audio-event-detection","display_name":"Audio Event Detection","score":0.524504},{"id":"https://openalex.org/keywords/generative-adversarial-networks","display_name":"Generative Adversarial Networks","score":0.51668}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8508062},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6205464},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.55755997},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.455732},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.45482802},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.41644526},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37229282},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2021.3079897","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":"62072328"},{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":"61672375"}],"datasets":[],"versions":[],"referenced_works_count":54,"referenced_works":["https://openalex.org/W1163620831","https://openalex.org/W1686810756","https://openalex.org/W1969746716","https://openalex.org/W1977985044","https://openalex.org/W1999802312","https://openalex.org/W2004990376","https://openalex.org/W2008142581","https://openalex.org/W2076169683","https://openalex.org/W2088529060","https://openalex.org/W2099332139","https://openalex.org/W2099471712","https://openalex.org/W2111919806","https://openalex.org/W2115388399","https://openalex.org/W2125389028","https://openalex.org/W2141645628","https://openalex.org/W2151617679","https://openalex.org/W2169930732","https://openalex.org/W2184902314","https://openalex.org/W2292998146","https://openalex.org/W2293174243","https://openalex.org/W2316743980","https://openalex.org/W2405756170","https://openalex.org/W2468162309","https://openalex.org/W2550225908","https://openalex.org/W2564453797","https://openalex.org/W2570915410","https://openalex.org/W2584032004","https://openalex.org/W2593414223","https://openalex.org/W2703895418","https://openalex.org/W2793476612","https://openalex.org/W2810177721","https://openalex.org/W2922098633","https://openalex.org/W2951523806","https://openalex.org/W2959861911","https://openalex.org/W2962960500","https://openalex.org/W2963066677","https://openalex.org/W2963073614","https://openalex.org/W2963315052","https://openalex.org/W2963341071","https://openalex.org/W2963373786","https://openalex.org/W2963807156","https://openalex.org/W2964121744","https://openalex.org/W2964121818","https://openalex.org/W2964345931","https://openalex.org/W2968379763","https://openalex.org/W2998341422","https://openalex.org/W3003396222","https://openalex.org/W3034619766","https://openalex.org/W3046202605","https://openalex.org/W3123318516","https://openalex.org/W4206647626","https://openalex.org/W4240777687","https://openalex.org/W4289665794","https://openalex.org/W4297817572"],"related_works":["https://openalex.org/W4375868962","https://openalex.org/W3179495260","https://openalex.org/W3127543252","https://openalex.org/W2897924318","https://openalex.org/W2530685530","https://openalex.org/W2138997758","https://openalex.org/W2088854863","https://openalex.org/W2065606036","https://openalex.org/W2011227383","https://openalex.org/W1976719989"],"abstract_inverted_index":{"Automatically":[0],"synthesizing":[1],"sounds":[2],"for":[3,170],"different":[4],"visual":[5],"contents":[6],"poses":[7],"a":[8,13,33,44,59,167],"challenge":[9],"and":[10,86,103,131,176],"there":[11],"is":[12,101],"strong":[14],"need":[15],"to":[16,50,93],"facilitate":[17],"the":[18,52,55,68,84,98,127,139,142],"direct":[19],"creation":[20],"of":[21,54,71,141],"realistic":[22],"sounds.":[23],"Different":[24],"from":[25],"previous":[26,89],"works,":[27],"in":[28,134],"this":[29],"paper,":[30],"we":[31,146],"propose":[32],"novel":[34],"deep":[35],"learning":[36],"based":[37],"approach,":[38],"which":[39,80,164],"formulates":[40],"sound":[41,63,133,162,174],"simulation":[42],"as":[43,173],"regression":[45],"problem.":[46],"This":[47],"allows":[48],"us":[49],"circumvent":[51],"complexity":[53],"acoustic":[56],"theory":[57],"by":[58,105],"novel,":[60],"general-purpose":[61],"neural":[62,143],"synthesis":[64],"(V2RA)":[65],"network.":[66],"Moreover,":[67],"end-to-end":[69],"architecture":[70,113],"V2RA":[72,99,157],"ensures":[73],"full":[74],"training":[75],"without":[76],"any":[77],"extra":[78],"inputs,":[79],"thereby":[81],"greatly":[82],"improves":[83],"scalability":[85],"reusability":[87],"over":[88],"works.":[90],"In":[91],"contrast":[92],"conventional":[94],"visual-to-audio":[95],"generation":[96],"methods,":[97],"problem":[100],"established":[102],"solved":[104],"generative":[106],"adversarial":[107],"networks":[108],"(GANs).":[109],"Furthermore,":[110],"our":[111,156],"network":[112,144,158],"can":[114,159],"directly":[115],"predict":[116],"synchronized":[117],"raw":[118],"audio":[119,128],"signals":[120],"(unlike":[121],"most":[122],"existing":[123],"approaches":[124],"that":[125,155],"handle":[126],"through":[129],"spectrograms)":[130],"generate":[132],"real":[135],"time.":[136],"To":[137],"evaluate":[138],"performance":[140],"generator,":[145],"specifically":[147],"introduce":[148],"two":[149],"quantitative":[150],"scores.":[151],"Various":[152],"experiments":[153],"demonstrate":[154],"produce":[160],"compelling":[161],"results,":[163],"thus":[165],"provides":[166],"viable":[168],"solution":[169],"applications":[171],"such":[172],"design":[175],"dubbing.":[177]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3163658555","counts_by_year":[{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":7}],"updated_date":"2024-11-23T10:37:35.093972","created_date":"2021-05-24"}