{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T14:06:14Z","timestamp":1730297174075,"version":"3.28.0"},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,1,9]],"date-time":"2023-01-09T00:00:00Z","timestamp":1673222400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,1,9]],"date-time":"2023-01-09T00:00:00Z","timestamp":1673222400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,1,9]]},"DOI":"10.1109\/slt54892.2023.10022796","type":"proceedings-article","created":{"date-parts":[[2023,1,27]],"date-time":"2023-01-27T18:54:03Z","timestamp":1674845643000},"page":"943-948","source":"Crossref","is-referenced-by-count":1,"title":["Generative Models for Improved Naturalness, Intelligibility, and Voicing of Whispered Speech"],"prefix":"10.1109","author":[{"given":"Dominik","family":"Wagner","sequence":"first","affiliation":[{"name":"Technische Hochschule Nürnberg Georg Simon Ohm,Germany"}]},{"given":"Sebastian P.","family":"Bayerl","sequence":"additional","affiliation":[{"name":"Technische Hochschule Nürnberg Georg Simon Ohm,Germany"}]},{"given":"Hector A. Cordourier","family":"Maruri","sequence":"additional","affiliation":[{"name":"Intel Labs"}]},{"given":"Tobias","family":"Bocklet","sequence":"additional","affiliation":[{"name":"Technische Hochschule Nürnberg Georg Simon Ohm,Germany"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1017\/cbo9781139166621"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1017\/cbo9780511627743"},{"volume-title":"Tracheoesophageal Speech. A Multidimensional Assessment of Voice Quality","year":"2001","author":"van As","key":"ref3"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2005-611"},{"key":"ref5","article-title":"End-to-end whisper to natural speech conversion using modified transformer network","volume":"abs\/2004.09347","author":"Niranjan","year":"2021","journal-title":"ArXiv"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1487"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2940700"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.5555\/2969033.2969125"},{"key":"ref9","article-title":"Attention-guided generative adversarial network for whisper to normal speech conversion","volume":"vol. abs\/2111.01342","author":"Gao","year":"2021","journal-title":"ArXiv"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.21437\/IberSPEECH.2018-25"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1428"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.23919\/EUSIPCO.2019.8902961"},{"key":"ref13","first-page":"1857","article-title":"Learning to discover cross-domain relations with generative adversarial networks","volume-title":"Proceedings of the 34th International Conference on Machine Learning","volume":"70","author":"Kim","year":"2017"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.244"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2013.2283471"},{"key":"ref16","article-title":"Melgan: Generative adversarial networks for conditional waveform synthesis","volume":"32","author":"Kumar","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref17","article-title":"Neural discrete representation learning","volume":"30","author":"van den Oord","year":"2017","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2938863"},{"volume-title":"Computational differences between whispered and non-whispered speech","year":"2010","author":"Lim","key":"ref19"},{"key":"ref20","article-title":"Auto-encoding variational bayes","volume-title":"2nd International Conference on Learning Representations","author":"Kingma","year":"2014"},{"key":"ref21","article-title":"Estimating or propagating gradients through stochastic neurons for conditional computation","volume":"abs\/1308.3432","author":"Bengio","year":"2013","journal-title":"ArXiv"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683143"},{"key":"ref23","first-page":"8000","article-title":"The challenge of realistic music generation: Modelling raw audio at scale","volume-title":"Proceedings of the 32nd International Conference on Neural Information Processing Systems","author":"Dieleman","year":"2018"},{"key":"ref24","article-title":"Theory and experiments on vector quantized autoencoders","volume":"abs\/1805.1106","author":"Roy","year":"2018","journal-title":"ArXiv"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN48605.2020.9207145"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2010.5495701"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.21437\/Eurospeech.2001-111"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1196"},{"key":"ref29","first-page":"12449","article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","volume":"33","author":"Baevski","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2021-703"},{"key":"ref31","first-page":"1780","article-title":"KSoF: The Kassel State of Fluency Dataset \u2013 A Therapy Centered Dataset of Stuttering","volume-title":"Proceedings of the Language Resources and Evaluation Conference","author":"Bayerl","year":"2022"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvoice.2023.01.012"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.35111\/17gk-bn40"},{"key":"ref34","first-page":"2579","article-title":"Visualizing data using t-sne","volume":"9","author":"van der Maaten","year":"2008","journal-title":"Journal of Machine Learning Research"},{"key":"ref35","first-page":"1558","article-title":"Autoencoding beyond pixels using a learned similarity metric","volume-title":"Proceedings of the 33rd International Conference on International Conference on Machine Learning","volume":"48","author":"Larsen","year":"2016"}],"event":{"name":"2022 IEEE Spoken Language Technology Workshop (SLT)","start":{"date-parts":[[2023,1,9]]},"location":"Doha, Qatar","end":{"date-parts":[[2023,1,12]]}},"container-title":["2022 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10022052\/10022330\/10022796.pdf?arnumber=10022796","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,13]],"date-time":"2024-02-13T08:07:37Z","timestamp":1707811657000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10022796\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,1,9]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/slt54892.2023.10022796","relation":{},"subject":[],"published":{"date-parts":[[2023,1,9]]}}}