{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,13]],"date-time":"2024-09-13T19:14:21Z","timestamp":1726254861230},"publisher-location":"Cham","reference-count":41,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031477232"},{"type":"electronic","value":"9783031477249"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-47724-9_13","type":"book-chapter","created":{"date-parts":[[2024,4,18]],"date-time":"2024-04-18T20:29:08Z","timestamp":1713472148000},"page":"190-202","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["ASR Bundestag: A Large-Scale Political Debate Dataset in German"],"prefix":"10.1007","author":[{"given":"Johannes","family":"Wirth","sequence":"first","affiliation":[]},{"given":"Ren\u00e9","family":"Peinl","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,4,19]]},"reference":[{"key":"13_CR1","unstructured":"Kaplan, J. et al.: Scaling laws for neural language models (2020). arXiv:2001.08361"},{"key":"13_CR2","unstructured":"Baevski, A., Zhou, H., Mohamed, A., Auli, M.: wav2vec 2.0: a framework for self-supervised learning of speech representations (2020). arXiv:2006.11477"},{"key":"13_CR3","doi-asserted-by":"crossref","unstructured":"Wirth, J., Peinl, R.: Automatic speech recognition in German: a detailed error analysis. In: 2022 IEEE International Conference on Omni-layer Intelligent Systems (COINS), IEEE, 2022, pp. 1\u20138 (2022)","DOI":"10.1109\/COINS54846.2022.9854978"},{"key":"13_CR4","doi-asserted-by":"publisher","first-page":"3451","DOI":"10.1109\/TASLP.2021.3122291","volume":"29","author":"W-N Hsu","year":"2021","unstructured":"Hsu, W.-N., Bolte, B., Tsai, Y.-H.H., Lakhotia, K., Salakhutdinov, R., Mohamed, A.: Hubert: self-supervised speech representation learning by masked prediction of hidden units. IEEE\/ACM Trans. Audio Speech Lang. Process. 29, 3451\u20133460 (2021)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"13_CR5","unstructured":"Chen, S., et al.: Wavlm: large-scale self-supervised pre-training for full stack speech processing (2021). arXiv:2110.13900"},{"key":"13_CR6","unstructured":"Chan, W., Park, D., Lee, C., Zhang, Y., Le, Q., Norouzi, M.: Speechstew: simply mix all available speech recognition data to train one large neural network (2021). arXiv:2104.02133"},{"key":"13_CR7","doi-asserted-by":"crossref","unstructured":"O\u2019Neill, P.K., et al.: SPGISpeech: 5,000 hours of transcribed financial audio for fully formatted end-to-end speech recognition (2021). arXiv:2104.02014","DOI":"10.21437\/Interspeech.2021-1860"},{"key":"13_CR8","doi-asserted-by":"crossref","unstructured":"Chen, G., et al.: Gigaspeech: an evolving, multi-domain asr corpus with 10,000 hours of transcribed audio (2021). arXiv:2106.06909","DOI":"10.21437\/Interspeech.2021-1965"},{"key":"13_CR9","unstructured":"Solak, I.: The M-AILABS speech dataset (2019). https:\/\/www.caito.de\/2019\/01\/the-m-ailabs-speech-dataset\/"},{"key":"13_CR10","unstructured":"Ardila, R., et al.: Common voice: a massively-multilingual speech corpus. In: Proceedings of the 12th Language Resources and Evaluation Conference, pp. 4218\u20134222 (2020)"},{"key":"13_CR11","doi-asserted-by":"crossref","unstructured":"Puchtler, P., Wirth, J., Peinl, R.: HUI-audio-corpus-German: a high quality TTS dataset. In: 44th German Conference on Artificial Intelligence (KI2021), Berlin, Germany (2021)","DOI":"10.1007\/978-3-030-87626-5_15"},{"key":"13_CR12","unstructured":"M\u00fcller, T.: Thorsten Open German Voice Dataset (2021). https:\/\/github.com\/thorstenMueller\/deep-learning-german-tts. Accessed 26 Mar 2021"},{"key":"13_CR13","unstructured":"Bavarian Archive for Speech Signals, BAS Alcohol Language Corpus (2016). http:\/\/hdl.handle.net\/11022\/1009-0000-0001-88E5-3, http:\/\/hdl.handle.net\/11022\/1009-0000-0001-88E5-3"},{"key":"13_CR14","unstructured":"Mapelli, V.: Strange Corpus 10 - SC10 (\u2019Accents II\u2019) (ELRA-S0114), ELRA (via CLARIN VLO), ISLRN 024\u2013991\u2013750\u2013952\u20133 (2004)"},{"key":"13_CR15","unstructured":"MacLean, K.: Voxforge. http:\/\/www.voxforge.org"},{"issue":"2","key":"13_CR16","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s10579-017-9410-y","volume":"53","author":"T Baumann","year":"2019","unstructured":"Baumann, T., K\u00f6hn, A., Hennig, F.: The spoken wikipedia corpus collection: harvesting, alignment and an application to hyperlistening. Lang. Resour. Eval. 53(2), 303\u2013329 (2019)","journal-title":"Lang. Resour. Eval."},{"key":"13_CR17","doi-asserted-by":"crossref","unstructured":"Hernandez, F., Nguyen, V., Ghannay, S., Tomashenko, N., Esteve, Y.: TED-LIUM 3: twice as much data and corpus repartition for experiments on speaker adaptation. In: International Conferences on Speech and Computer, pp. 198\u2013208. Springer (2018)","DOI":"10.1007\/978-3-319-99579-3_21"},{"key":"13_CR18","doi-asserted-by":"crossref","unstructured":"Wang, C., et al.: VoxPopuli: a large-scale multilingual speech corpus for representation learning, semi-supervised learning and interpretation. In: 11th International Joint Conferences on Natural Language Processing, Online: Association for Computational Linguistics, August 2021, pp. 993\u20131003","DOI":"10.18653\/v1\/2021.acl-long.80"},{"key":"13_CR19","unstructured":"Draxler, C., Schiel, F.: Three new corpora at the Bavarian archive for speech signals-and a first step towards distributed web-based recording (2002)"},{"key":"13_CR20","unstructured":"Pan, P.: Tackle how to identify and understand high qualitative AI data solutions will improve your model performance. In: Interspeech 2022, Incheon, Korea (2022). https:\/\/www.interspeech2022.org\/program\/industrytalk.php"},{"key":"13_CR21","doi-asserted-by":"crossref","unstructured":"Radeck, S.-A., et al.: Open source german distant speech recognition: corpus and acoustic model. In: International Conference on Text, Speech, and Dialogue, pp. 480\u2013488. Springer (2015)","DOI":"10.1007\/978-3-319-24033-6_54"},{"key":"13_CR22","unstructured":"BMBF and Projekttr\u00e4ger DLR, VERBMOBIL II - VM CD21.1 - VM21.1 (ELRA-S0034\u201330). European Language Resources (ELRA), 1.0, ISLRN 837\u2013421\u2013490- 699\u20133. 2004"},{"key":"13_CR23","first-page":"2757","volume":"2020","author":"V Pratap","year":"2020","unstructured":"Pratap, V., Xu, Q., Sriram, A., Synnaeve, G., Collobert, R.: MLS: a large-scale multilingual dataset for speech research. Interspeech 2020, 2757\u20132761 (2020)","journal-title":"Interspeech"},{"key":"13_CR24","unstructured":"LibriVox | free public domain audiobooks. https:\/\/librivox.org\/ Accessed 15 Dec 2022"},{"key":"13_CR25","unstructured":"Bavarian Archive for Speech Signals, \u201cBAS Sl100,\u201d (1995) http:\/\/hdl.handle.net\/11022\/1009-0000-0007-E9CF-A. Accessed 28 Mar 2022"},{"key":"13_CR26","doi-asserted-by":"crossref","unstructured":"Garnerin, M., Rossato, S., Besacier, L.: Investigating the impact of gender representation in asr training data: a case study on librispeech. In: 3rd Workshop on Gender Bias in Natural Language Processing, Association for Computational Linguistics, 2021, pp. 86\u201392","DOI":"10.18653\/v1\/2021.gebnlp-1.10"},{"key":"13_CR27","doi-asserted-by":"crossref","unstructured":"Chung, Y.-A., et al.: W2v-bert: combining contrastive learning and masked language modeling for self-supervised speech pre-training (2021). arXiv:2108.06209","DOI":"10.1109\/ASRU51503.2021.9688253"},{"key":"13_CR28","unstructured":"Deutscher Bundestag - Mediathek (2022). https:\/\/www.bundestag.de\/mediathek. Accessed 15 Dec 2022"},{"key":"13_CR29","unstructured":"Nutzungsbedingungen f\u00fcr das Audio-und Videomaterial des Parlamentsfernsehens. https:\/\/www.bundestag.de\/mediathek. Accessed 15 Dec 2022"},{"key":"13_CR30","unstructured":"Kuchaiev, O., et al.: NeMo: a toolkit for building AI applications using Neural Modules. http:\/\/arxiv.org\/abs\/1909.09577. Accessed 13 Sept 2019, 13 Dec 2022"},{"key":"13_CR31","unstructured":"Bredin, H., et al.: pyannote.audio: neural building blocks for speaker diarization. http:\/\/arxiv.org\/abs\/1911.01255. Accessed 04 Nov 2019, 07 Dec 2022"},{"key":"13_CR32","doi-asserted-by":"publisher","unstructured":"K\u00fcrzinger, L., Winkelbauer, D., Li, L., Watzel, T., Rigoll, G.: CTC-Segmentation of Large Corpora for German End-to-End Speech Recognition [EESS], vol. 12335, pp. 267\u2013278 (2020). arXiv:2007.09127, https:\/\/doi.org\/10.1007\/978-3-030-60276-5_27","DOI":"10.1007\/978-3-030-60276-5_27"},{"key":"13_CR33","doi-asserted-by":"crossref","unstructured":"You, Z., Feng, S., Su, D., Yu, D.: Speechmoe: scaling to large acoustic models with dynamic routing mixture of experts (2021). arXiv:2105.03036","DOI":"10.21437\/Interspeech.2021-478"},{"key":"13_CR34","doi-asserted-by":"crossref","unstructured":"You, Z., Feng, S., Su, D., Yu, D.: Speechmoe2: mixture-of-experts model with improved routing. In: ICASSP 2022\u20132022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, 2022, pp. 7217\u20137221","DOI":"10.1109\/ICASSP43922.2022.9747065"},{"key":"13_CR35","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Sun, S., and Ma, L., \u201cTiny transducer: A highly-efficient speech recognition model on edge devices,\u201d in ICASSP 2021\u20132021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, 2021, pp. 6024\u20136028","DOI":"10.1109\/ICASSP39728.2021.9413854"},{"key":"13_CR36","doi-asserted-by":"crossref","unstructured":"Kim, S., et al.: Q-ASR: integer-only zero-shot quantization for efficient speech recognition (2021). arXiv:2103.16827","DOI":"10.1109\/ICASSP43922.2022.9747552"},{"key":"13_CR37","unstructured":"Ravanelli, M., et al.: SpeechBrain: a general-purpose speech toolkit (2021)"},{"key":"13_CR38","unstructured":"Bakhturina, E., Lavrukhin, V., Ginsburg, B.: NeMo toolbox for speech dataset construction (2021). arXiv:2104.04896"},{"key":"13_CR39","unstructured":"Bermuth, D., Poeppel, A., Reif, W.: Scribosermo: fast speech-to-text models for german and other languages (2021). arXiv:2110.07982"},{"key":"13_CR40","unstructured":"Sinha, Y., Siegert, I.: Improving the accuracy for voice-assistant conversations in German by combining different online ASR-API outputs. In: Human Perspectives on Spoken Human-Machine Interaction, pp. 11\u201316 (2021)"},{"key":"13_CR41","unstructured":"Lopez, A., Liesenfeld, A., Dingemanse, M.: Evaluation of automatic speech recognition for conversational speech in Dutch, English and German: what goes missing? In: Proceedings of the 18th Conference on Natural Language Processing (KONVENS 2022), 2022, pp. 135\u2013143"}],"container-title":["Lecture Notes in Networks and Systems","Intelligent Systems and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-47724-9_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,18]],"date-time":"2024-04-18T20:36:20Z","timestamp":1713472580000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-47724-9_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031477232","9783031477249"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-47724-9_13","relation":{},"ISSN":["2367-3370","2367-3389"],"issn-type":[{"type":"print","value":"2367-3370"},{"type":"electronic","value":"2367-3389"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"19 April 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}