{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T07:40:16Z","timestamp":1733211616911,"version":"3.30.0"},"publisher-location":"Cham","reference-count":22,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031781711"},{"type":"electronic","value":"9783031781728"}],"license":[{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-78172-8_26","type":"book-chapter","created":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T09:48:32Z","timestamp":1733132912000},"page":"397-408","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["SWave: Improving Vocoder Efficiency by\u00a0Straightening the\u00a0Waveform Generation Path"],"prefix":"10.1007","author":[{"given":"Pan","family":"Liu","sequence":"first","affiliation":[]},{"given":"Jianping","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Xiaohua","family":"Tian","sequence":"additional","affiliation":[]},{"given":"Zhouhan","family":"Lin","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,3]]},"reference":[{"key":"26_CR1","unstructured":"Chen, N., Zhang, Y., Zen, H., Weiss, R.J., Norouzi, M., Chan, W.: WaveGrad: estimating gradients for waveform generation. arXiv preprint arXiv:2009.00713 (2020)"},{"key":"26_CR2","doi-asserted-by":"crossref","unstructured":"Chen, Z., et al.: InferGrad: improving diffusion models for vocoder by considering inference in training. In: ICASSP 2022-2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 8432\u20138436. IEEE (2022)","DOI":"10.1109\/ICASSP43922.2022.9746690"},{"key":"26_CR3","doi-asserted-by":"crossref","unstructured":"Guan, W., Su, Q., Zhou, H., Miao, S., Xie, X., Li, L., Hong, Q.: ReFlow-TTS: a rectified flow model for high-fidelity text-to-speech. arXiv preprint arXiv:2309.17056 (2023)","DOI":"10.1109\/ICASSP48485.2024.10447822"},{"key":"26_CR4","doi-asserted-by":"crossref","unstructured":"Guo, Y., Du, C., Ma, Z., Chen, X., Yu, K.: VoiceFlow: efficient text-to-speech with rectified flow matching. arXiv preprint arXiv:2309.05027 (2023)","DOI":"10.1109\/ICASSP48485.2024.10445948"},{"key":"26_CR5","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"26_CR6","doi-asserted-by":"crossref","unstructured":"Huang, R., Lam, M.W., Wang, J., Su, D., Yu, D., Ren, Y., Zhao, Z.: FastDiff: a fast conditional diffusion model for high-quality speech synthesis. arXiv preprint arXiv:2204.09934 (2022)","DOI":"10.24963\/ijcai.2022\/577"},{"key":"26_CR7","unstructured":"Ito, K., Johnson, L.: The LJ speech dataset (2017)"},{"key":"26_CR8","unstructured":"Kalchbrenner, N., et al.: Efficient neural audio synthesis. In: International Conference on Machine Learning, pp. 2410\u20132419. PMLR (2018)"},{"key":"26_CR9","unstructured":"Kingma, D.P., Dhariwal, P.: Glow: generative flow with invertible 1x1 convolutions. Adv. Neural Inf. Process. Syst. 31 (2018)"},{"key":"26_CR10","first-page":"17022","volume":"33","author":"J Kong","year":"2020","unstructured":"Kong, J., Kim, J., Bae, J.: HiFi-GAN: generative adversarial networks for efficient and high fidelity speech synthesis. Adv. Neural. Inf. Process. Syst. 33, 17022\u201317033 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"26_CR11","unstructured":"Kong, Z., Ping, W., Huang, J., Zhao, K., Catanzaro, B.: DiffWave: a versatile diffusion model for audio synthesis. arXiv preprint arXiv:2009.09761 (2020)"},{"key":"26_CR12","unstructured":"Kumar, K., et al.: MelGAN: generative adversarial networks for conditional waveform synthesis. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"26_CR13","unstructured":"Liu, X., Gong, C., Liu, Q.: Flow straight and fast: learning to generate and transfer data with rectified flow. arXiv preprint arXiv:2209.03003 (2022)"},{"key":"26_CR14","unstructured":"Liu, X., Zhang, X., Ma, J., Peng, J., Liu, Q.: InstaFlow: one step is enough for high-quality diffusion-based text-to-image generation. arXiv preprint arXiv:2309.06380 (2023)"},{"key":"26_CR15","unstructured":"Mehri, S., Kumar, K., Gulrajani, I., Kumar, R., Jain, S., Sotelo, J., Courville, A., Bengio, Y.: SampleRNN: an unconditional end-to-end neural audio generation model. arXiv preprint arXiv:1612.07837 (2016)"},{"key":"26_CR16","unstructured":"Oord, A.v.d., et al.: WaveNet: a generative model for raw audio. arXiv preprint arXiv:1609.03499 (2016)"},{"key":"26_CR17","unstructured":"Peng, K., Ping, W., Song, Z., Zhao, K.: Non-autoregressive neural text-to-speech. In: International Conference on Machine Learning, pp. 7586\u20137598. PMLR (2020)"},{"key":"26_CR18","unstructured":"Popov, V., Vovk, I., Gogoryan, V., Sadekova, T., Kudinov, M.: Grad-TTS: a diffusion probabilistic model for text-to-speech. In: International Conference on Machine Learning, pp. 8599\u20138608. PMLR (2021)"},{"key":"26_CR19","doi-asserted-by":"crossref","unstructured":"Prenger, R., Valle, R., Catanzaro, B.: WaveGlow: a flow-based generative network for speech synthesis. In: ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 3617\u20133621. IEEE (2019)","DOI":"10.1109\/ICASSP.2019.8683143"},{"key":"26_CR20","unstructured":"Rezende, D., Mohamed, S.: Variational inference with normalizing flows. In: International Conference on Machine Learning, pp. 1530\u20131538. PMLR (2015)"},{"key":"26_CR21","unstructured":"Song, Y., Dhariwal, P., Chen, M., Sutskever, I.: Consistency models. arXiv preprint arXiv:2303.01469 (2023)"},{"key":"26_CR22","doi-asserted-by":"crossref","unstructured":"Yamamoto, R., Song, E., Kim, J.M.: Parallel waveGAN: a fast waveform generation model based on generative adversarial networks with multi-resolution spectrogram. In: ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6199\u20136203. IEEE (2020)","DOI":"10.1109\/ICASSP40776.2020.9053795"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-78172-8_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T10:08:37Z","timestamp":1733134117000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-78172-8_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,3]]},"ISBN":["9783031781711","9783031781728"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-78172-8_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,12,3]]},"assertion":[{"value":"3 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kolkata","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"India","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icpr2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icpr2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}