{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T13:50:41Z","timestamp":1725717041180},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,1,9]],"date-time":"2023-01-09T00:00:00Z","timestamp":1673222400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,1,9]],"date-time":"2023-01-09T00:00:00Z","timestamp":1673222400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,1,9]]},"DOI":"10.1109\/slt54892.2023.10023016","type":"proceedings-article","created":{"date-parts":[[2023,1,27]],"date-time":"2023-01-27T13:54:03Z","timestamp":1674827643000},"page":"38-45","source":"Crossref","is-referenced-by-count":1,"title":["Flickering Reduction with Partial Hypothesis Reranking for Streaming ASR"],"prefix":"10.1109","author":[{"given":"Antoine","family":"Bruguier","sequence":"first","affiliation":[{"name":"Google LLC,USA"}]},{"given":"David","family":"Qiu","sequence":"additional","affiliation":[{"name":"Google LLC,USA"}]},{"given":"Trevor","family":"Strohman","sequence":"additional","affiliation":[{"name":"Google LLC,USA"}]},{"given":"Yanzhang","family":"He","sequence":"additional","affiliation":[{"name":"Google LLC,USA"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.3758\/s13414-020-02237-2"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414607"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.3115\/1620754.1620810"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"journal-title":"Synchronization accessibility user requirements","year":"2022","author":"noble","key":"ref15"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1097-0193(1997)5:4<287::AID-HBM14>3.0.CO;2-B"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10551"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-206"},{"journal-title":"Contains two videos from the same utterance from Librispeech The file libri_normal_speed_wi t h iaud i o mp4 is the play-back at regular speed with the audio Timing information is provided as second and frame number (with 30 frame per seconds) The top row is at the base the middle row is our proposed algorithm with ? = 0 2 and the bottom row the comparison with partial delay with PEI = 300ms The file libri_slow_mo_silent mp4 is the exact same information slowed down 10 times and without the audio","article-title":"Supplementary material","year":"0","key":"ref11"},{"key":"ref33","article-title":"Recog-nizing long-form speech using streaming end-to-end models","author":"narayanan","year":"2019","journal-title":"ASRU"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.eacl-main.216"},{"key":"ref32","article-title":"Tied & reduced rnn-t de-coder","author":"botros","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref2","article-title":"Stability and accuracy in incremental speech recog-nition","author":"selfridge","year":"2011","journal-title":"SIGdial"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1982.1171441"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953069"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4471-5779-3"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9003906"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-496"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2012-301"},{"key":"ref26","article-title":"Fastemit: Low-latency streaming asr with sequence-level emission regu-larization","author":"yu","year":"2021","journal-title":"CASSP"},{"journal-title":"Introduction to Algorithms","year":"2001","author":"cormen","key":"ref25"},{"key":"ref20","article-title":"Transformer-transducer: End-to-end speech recognition with self-attention","author":"yeh","year":"2019","journal-title":"CoRR"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1194"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054188"},{"key":"ref28","article-title":"RNN-T for latency controlled ASR with improved beam search","author":"jain","year":"2019","journal-title":"CoRR"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413899"},{"key":"ref29","article-title":"A simple, fast di-verse decoding algorithm for neural generation","author":"li","year":"2016","journal-title":"ArXiv"},{"key":"ref8","article-title":"Re-translation strate-gies for long form, simultaneous, spoken language translation","author":"arivazhagan","year":"2020","journal-title":"ICASSP"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/ICSLP.2002-519"},{"key":"ref9","article-title":"Streaming cascade-based speech translation leveraged by a direct segmentation model","author":"iranzo-sanchez","year":"2020","journal-title":"EMNLP"},{"key":"ref4","article-title":"In-cremental dialogue system faster than and preferred to its non-incremental counterpart","author":"aist","year":"2007","journal-title":"DECALOG"},{"key":"ref3","article-title":"Incremental speech recognition for multimodal in-terfaces","author":"fink","year":"1998","journal-title":"InterSpeech"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682336"},{"key":"ref5","article-title":"Evaluation of the kit lecture translation system","author":"muller","year":"2016","journal-title":"LREC"}],"event":{"name":"2022 IEEE Spoken Language Technology Workshop (SLT)","start":{"date-parts":[[2023,1,9]]},"location":"Doha, Qatar","end":{"date-parts":[[2023,1,12]]}},"container-title":["2022 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10022052\/10022330\/10023016.pdf?arnumber=10023016","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,20]],"date-time":"2023-02-20T17:08:46Z","timestamp":1676912926000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10023016\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,1,9]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/slt54892.2023.10023016","relation":{},"subject":[],"published":{"date-parts":[[2023,1,9]]}}}