{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T15:07:10Z","timestamp":1725808030678},"reference-count":45,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,1,9]],"date-time":"2023-01-09T00:00:00Z","timestamp":1673222400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,1,9]],"date-time":"2023-01-09T00:00:00Z","timestamp":1673222400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,1,9]]},"DOI":"10.1109\/slt54892.2023.10023346","type":"proceedings-article","created":{"date-parts":[[2023,1,27]],"date-time":"2023-01-27T13:54:03Z","timestamp":1674827643000},"page":"838-845","source":"Crossref","is-referenced-by-count":2,"title":["A Truly Multilingual First Pass and Monolingual Second Pass Streaming on-Device ASR System"],"prefix":"10.1109","volume":"abs 1706 3762","author":[{"given":"Sepand","family":"Mavandadi","sequence":"first","affiliation":[{"name":"Google LLC,USA"}]},{"given":"Bo","family":"Li","sequence":"additional","affiliation":[{"name":"Google LLC,USA"}]},{"given":"Chao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Google LLC,USA"}]},{"given":"Brian","family":"Farris","sequence":"additional","affiliation":[{"name":"Google LLC,USA"}]},{"given":"Tara N.","family":"Sainath","sequence":"additional","affiliation":[{"name":"Google LLC,USA"}]},{"given":"Trevor","family":"Strohman","sequence":"additional","affiliation":[{"name":"Google LLC,USA"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2017.8268935"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-212"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472618"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462105"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414920"},{"key":"ref14","first-page":"4835","article-title":"Joint etc-attention based end-to-end speech recognition using multi-task learning","author":"kim","year":"2017","journal-title":"ICASSP 2017"},{"key":"ref36","article-title":"Streaming end-to-end multilingual speech recognition with joint language identification","author":"haghani","year":"0","journal-title":"Interspeech 2022"},{"key":"ref31","first-page":"8112","article-title":"Improving the latency and quality of cascaded encoders","author":"tara","year":"2022","journal-title":"ICASSP 2022-2022"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414607"},{"key":"ref11","article-title":"End-to-end continuous speech recognition using attention-based recurrent nn: First re-sults","author":"chorowski","year":"2014","journal-title":"ArXiv Preprint"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413899"},{"key":"ref10","doi-asserted-by":"crossref","first-page":"6645","DOI":"10.1109\/ICASSP.2013.6638947","article-title":"Speech recognition with deep recurrent neu-ral networks","author":"graves","year":"2013","journal-title":"2013 IEEE International Conference on Acoustics Speech and Signal Processing"},{"journal-title":"An efficient streaming non-recurrent on-device end-to-end model with improvements to rare-word modeling","year":"2021","author":"tara","key":"ref32"},{"journal-title":"Google Tensor is a milestone for machine learning","year":"0","key":"ref2"},{"journal-title":"NVIDIA-ACCELERATED DATA SCI-ENCE","year":"0","key":"ref1"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2846"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053600"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682336"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053205"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462201"},{"key":"ref18","doi-asserted-by":"crossref","first-page":"265","DOI":"10.1109\/ASRU.2017.8268945","article-title":"Language independent end-to-end architecture for joint language identification and speech recognition","author":"watanabe","year":"2017","journal-title":"Automatic Speech Recognition and Understanding (ASRU) 2017 IEEE Workshop on"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2831"},{"key":"ref23","article-title":"Streaming end-to-end bilingual asr systems with joint language identi-fication","author":"punjabi","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref45","article-title":"Attention is all you need","volume":"abs 1706 3762","author":"vaswani","year":"2017","journal-title":"CoRR"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414379"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746594"},{"key":"ref20","first-page":"4904","article-title":"Mul-tilingual speech recognition with a single end-to-end model","author":"toshniwal","year":"2018","journal-title":"ICASSP 2018"},{"key":"ref42","article-title":"Lingvo: a modular and scalable framework for sequence-to-sequence modeling","author":"shen","year":"2019","journal-title":"ArXiv"},{"key":"ref41","article-title":"A structured self-attentive sentence embedding","author":"lin","year":"2017","journal-title":"5th International Conference on Learning Representations ICLR 2017"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9003870"},{"key":"ref44","first-page":"4596","article-title":"Adafactor: Adaptive learning rates with sublinear memory cost","author":"shazeer","year":"2018","journal-title":"International Conference on Machine Learning"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10006"},{"journal-title":"Cloud Tensor Processing Units (TPUs","year":"0","key":"ref43"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2858"},{"key":"ref27","first-page":"6244","article-title":"Multi-dialect speech recognition in English using attention on ensem-ble of experts","author":"das","year":"2021","journal-title":"ICASSP 2021-2021"},{"key":"ref29","first-page":"6422","article-title":"A con-figurable multilingual model is all you need to recognize all languages","author":"zhou","year":"2022","journal-title":"ICASSP 2022-2022"},{"key":"ref8","article-title":"Accented speech recognition: A sur-vey","author":"hinsvark","year":"2021","journal-title":"ArXiv Preprint"},{"journal-title":"Multilingual People","year":"0","key":"ref7"},{"key":"ref9","first-page":"arxiv-1211","article-title":"Sequence transduction with recurrent neural networks","author":"graves","year":"2012","journal-title":"ArXiv e-prints"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2019.2908700"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/nature14539"},{"journal-title":"How many languages are there in the world?","year":"0","key":"ref6"},{"key":"ref5","article-title":"Recent advances in end-to-end auto-matic speech recognition","volume":"11","author":"li","year":"2021","journal-title":"APSIPA Transactions on Sig-nal and Information Processing"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413803"}],"event":{"name":"2022 IEEE Spoken Language Technology Workshop (SLT)","start":{"date-parts":[[2023,1,9]]},"location":"Doha, Qatar","end":{"date-parts":[[2023,1,12]]}},"container-title":["2022 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10022052\/10022330\/10023346.pdf?arnumber=10023346","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,20]],"date-time":"2023-02-20T17:08:24Z","timestamp":1676912904000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10023346\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,1,9]]},"references-count":45,"URL":"https:\/\/doi.org\/10.1109\/slt54892.2023.10023346","relation":{},"subject":[],"published":{"date-parts":[[2023,1,9]]}}}