{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,7,4]],"date-time":"2024-07-04T12:09:29Z","timestamp":1720094969173},"reference-count":41,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2023,8,30]],"date-time":"2023-08-30T00:00:00Z","timestamp":1693353600000},"content-version":"am","delay-in-days":333,"URL":"http:\/\/www.elsevier.com\/open-access\/userlicense\/1.0\/"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Speech Communication"],"published-print":{"date-parts":[[2022,10]]},"DOI":"10.1016\/j.specom.2022.08.005","type":"journal-article","created":{"date-parts":[[2022,8,30]],"date-time":"2022-08-30T15:44:28Z","timestamp":1661874268000},"page":"57-66","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Recursive Feature Diversity Network for audio super-resolution"],"prefix":"10.1016","volume":"144","author":[{"given":"Bo","family":"Jiang","sequence":"first","affiliation":[]},{"given":"Mixiao","family":"Hou","sequence":"additional","affiliation":[]},{"given":"Jiahuan","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Yao","family":"Lu","sequence":"additional","affiliation":[]},{"given":"David","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Guangming","family":"Lu","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.specom.2022.08.005_b1","doi-asserted-by":"crossref","unstructured":"Agustsson, E., Timofte, R., 2017. Ntire 2017 challenge on single image super-resolution: Dataset and study. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops. pp. 126\u2013135.","DOI":"10.1109\/CVPRW.2017.150"},{"key":"10.1016\/j.specom.2022.08.005_b2","series-title":"2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"5429","article-title":"Efficient super-wide bandwidth extension using linear prediction based analysis-synthesis","author":"Bachhav","year":"2018"},{"key":"10.1016\/j.specom.2022.08.005_b3","series-title":"INTERSPEECH","first-page":"1505","article-title":"Bandwidth expansion of narrowband speech using non-negative matrix factorization","author":"Bansal","year":"2005"},{"key":"10.1016\/j.specom.2022.08.005_b4","series-title":"NeurIPS","article-title":"Temporal FiLM: Capturing long-range sequence dependencies with feature-wise modulations","author":"Birnbaum","year":"2019"},{"key":"10.1016\/j.specom.2022.08.005_b5","series-title":"2009 16th IEEE International Conference on Image Processing (ICIP)","first-page":"349","article-title":"Nonlocal back-projection for adaptive image enlargement","author":"Dong","year":"2009"},{"key":"10.1016\/j.specom.2022.08.005_b6","series-title":"Proceedings of the 1st IEEE Benelux Workshop on Model Based Processing and Coding of Audio (MPCA\u201902)","article-title":"Bandwidth extension of audio signals by spectral band replication","author":"Ekstrand","year":"2002"},{"key":"10.1016\/j.specom.2022.08.005_b7","series-title":"IEEE Workshop on Speech Coding Proceedings. Model, Coders, and Error Criteria","first-page":"174","article-title":"A new technique for wideband enhancement of coded narrowband speech","author":"Epps","year":"1999"},{"key":"10.1016\/j.specom.2022.08.005_b8","series-title":"IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"3717","article-title":"Speech super resolution generative adversarial network","author":"Eskimez","year":"2019"},{"key":"10.1016\/j.specom.2022.08.005_b9","series-title":"ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"3717","article-title":"Speech super resolution generative adversarial network","author":"Eskimez","year":"2019"},{"key":"10.1016\/j.specom.2022.08.005_b10","series-title":"ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"606","article-title":"Learning bandwidth expansion using perceptually-motivated loss","author":"Feng","year":"2019"},{"issue":"5","key":"10.1016\/j.specom.2022.08.005_b11","doi-asserted-by":"crossref","first-page":"380","DOI":"10.1109\/TASSP.1976.1162849","article-title":"Distance measures for speech processing","volume":"24","author":"Gray","year":"1976","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"key":"10.1016\/j.specom.2022.08.005_b12","doi-asserted-by":"crossref","unstructured":"Guo, Y., Chen, J., Wang, J., Chen, Q., Cao, J., Deng, Z., Xu, Y., Tan, M., 2020. Closed-loop Matters: Dual Regression Networks for Single Image Super-Resolution. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 5407\u20135416.","DOI":"10.1109\/CVPR42600.2020.00545"},{"key":"10.1016\/j.specom.2022.08.005_b13","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s11760-016-0880-y","article-title":"First-order derivative-based super-resolution","volume":"11","author":"Haris","year":"2017","journal-title":"Signal, Image Video Process."},{"issue":"3","key":"10.1016\/j.specom.2022.08.005_b14","first-page":"231","article-title":"Improving resolution by image registration","volume":"53","author":"Irani","year":"1991","journal-title":"CVGIP: Graph. Models Image Process."},{"key":"10.1016\/j.specom.2022.08.005_b15","series-title":"IEEE International Conference on Acoustics, Speech, and Signal Processing","first-page":"I","article-title":"Artificial bandwidth extension of speech signals using MMSE estimation based on a hidden Markov model","volume":"1","author":"Jax","year":"2003"},{"key":"10.1016\/j.specom.2022.08.005_b16","series-title":"2003 IEEE International Conference on Acoustics, Speech, and Signal Processing, 2003. Proceedings.(ICASSP\u201903)","first-page":"I","article-title":"Artificial bandwidth extension of speech signals using MMSE estimation based on a hidden Markov model","volume":"1","author":"Jax","year":"2003"},{"key":"10.1016\/j.specom.2022.08.005_b17","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2022.108317","article-title":"Real noise image adjustment networks for saliency-aware stylistic color retouch","volume":"242","author":"Jiang","year":"2022","journal-title":"Knowl.-Based Syst."},{"key":"10.1016\/j.specom.2022.08.005_b18","doi-asserted-by":"crossref","first-page":"5124","DOI":"10.1109\/TCSVT.2022.3149518","article-title":"Deep image denoising with adaptive priors","volume":"32","author":"Jiang","year":"2022","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.specom.2022.08.005_b19","doi-asserted-by":"crossref","DOI":"10.1109\/TIM.2022.3189739","article-title":"Multi-level noise contrastive network for few-shot image denoising","author":"Jiang","year":"2022","journal-title":"IEEE Trans. Instrum. Meas."},{"key":"10.1016\/j.specom.2022.08.005_b20","series-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2014"},{"key":"10.1016\/j.specom.2022.08.005_b21","series-title":"Audio super resolution using neural networks","author":"Kuleshov","year":"2017"},{"key":"10.1016\/j.specom.2022.08.005_b22","series-title":"Audio Bandwidth Extension: Application of Psychoacoustics, Signal Processing and Loudspeaker Design","author":"Larsen","year":"2005"},{"key":"10.1016\/j.specom.2022.08.005_b23","series-title":"2019 3rd International Conference on Electronics, Communication and Aerospace Technology (ICECA)","first-page":"186","article-title":"Image-to-image translation using generative adversarial network","author":"Lata","year":"2019"},{"key":"10.1016\/j.specom.2022.08.005_b24","series-title":"INTERSPEECH","first-page":"3416","article-title":"Speech audio super-resolution for speech recognition","author":"Li","year":"2019"},{"key":"10.1016\/j.specom.2022.08.005_b25","series-title":"16th Annual Conference of the International Speech Communication Association","first-page":"2578","article-title":"DNN-based speech bandwidth expansion and its application to adding high-frequency missing features for automatic speech recognition of narrowband speech","author":"Li","year":"2015"},{"key":"10.1016\/j.specom.2022.08.005_b26","series-title":"2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"4395","article-title":"A deep neural network approach to speech bandwidth expansion","author":"Li","year":"2015"},{"key":"10.1016\/j.specom.2022.08.005_b27","doi-asserted-by":"crossref","unstructured":"Li, Z., Yang, J., Liu, Z., Yang, X., Jeon, G., Wu, W., 2019b. Feedback network for image super-resolution. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 3867\u20133876.","DOI":"10.1109\/CVPR.2019.00399"},{"key":"10.1016\/j.specom.2022.08.005_b28","series-title":"IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"646","article-title":"Time-frequency networks for audio super-resolution","author":"Lim","year":"2018"},{"key":"10.1016\/j.specom.2022.08.005_b29","doi-asserted-by":"crossref","unstructured":"Mei, Y., Fan, Y., Zhou, Y., Huang, L., Huang, T.S., Shi, H., 2020. Image super-resolution with cross-scale non-local attention and exhaustive self-exemplars mining. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 5690\u20135699.","DOI":"10.1109\/CVPR42600.2020.00573"},{"issue":"8","key":"10.1016\/j.specom.2022.08.005_b30","doi-asserted-by":"crossref","first-page":"1006","DOI":"10.1109\/LSP.2014.2379648","article-title":"Can we automatically transform speech recorded on common consumer devices in real-world environments into professional production quality speech?\u2014a dataset, insights, and challenges","volume":"22","author":"Mysore","year":"2014","journal-title":"IEEE Signal Process. Lett."},{"key":"10.1016\/j.specom.2022.08.005_b31","series-title":"IEEE International Conference on Acoustics, Speech, and Signal Processing","first-page":"1843","article-title":"Narrowband to wideband conversion of speech using GMM based transformation","author":"Park","year":"2000"},{"key":"10.1016\/j.specom.2022.08.005_b32","series-title":"Automatic differentiation in pytorch","author":"Paszke","year":"2017"},{"key":"10.1016\/j.specom.2022.08.005_b33","series-title":"Cardinal Spline Interpolation","author":"Schoenberg","year":"1973"},{"key":"10.1016\/j.specom.2022.08.005_b34","series-title":"2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"6087","article-title":"A maximum a posterior-based reconstruction approach to speech bandwidth expansion in noise","author":"Seo","year":"2014"},{"key":"10.1016\/j.specom.2022.08.005_b35","series-title":"2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"4779","article-title":"Natural tts synthesis by conditioning wavenet on mel spectrogram predictions","author":"Shen","year":"2018"},{"key":"10.1016\/j.specom.2022.08.005_b36","series-title":"ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"696","article-title":"Bandwidth extension is all you need","author":"Su","year":"2021"},{"key":"10.1016\/j.specom.2022.08.005_b37","doi-asserted-by":"crossref","first-page":"8916","DOI":"10.1109\/TIP.2020.3021789","article-title":"Unified generative adversarial networks for controllable image-to-image translation","volume":"29","author":"Tang","year":"2020","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.specom.2022.08.005_b38","series-title":"Superseded-CSTR VCTK corpus: English multi-speaker corpus for CSTR voice cloning toolkit","author":"Veaux","year":"2017"},{"key":"10.1016\/j.specom.2022.08.005_b39","doi-asserted-by":"crossref","unstructured":"Zhang, K., Gool, L.V., Timofte, R., 2020. Deep unfolding network for image super-resolution. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 3217\u20133226.","DOI":"10.1109\/CVPR42600.2020.00328"},{"key":"10.1016\/j.specom.2022.08.005_b40","doi-asserted-by":"crossref","first-page":"200","DOI":"10.1016\/j.neucom.2016.11.049","article-title":"Iterative projection reconstruction for fast and efficient image upsampling","volume":"226","author":"Zhao","year":"2017","journal-title":"Neurocomputing"},{"key":"10.1016\/j.specom.2022.08.005_b41","series-title":"2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"12803","article-title":"Deep adversarial decomposition: A unified framework for separating superimposed images","author":"Zou","year":"2020"}],"container-title":["Speech Communication"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167639322001091?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167639322001091?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,4,1]],"date-time":"2024-04-01T10:46:56Z","timestamp":1711968416000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0167639322001091"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10]]},"references-count":41,"alternative-id":["S0167639322001091"],"URL":"https:\/\/doi.org\/10.1016\/j.specom.2022.08.005","relation":{},"ISSN":["0167-6393"],"issn-type":[{"value":"0167-6393","type":"print"}],"subject":[],"published":{"date-parts":[[2022,10]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Recursive Feature Diversity Network for audio super-resolution","name":"articletitle","label":"Article Title"},{"value":"Speech Communication","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.specom.2022.08.005","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2022 Published by Elsevier B.V.","name":"copyright","label":"Copyright"}]}}