{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T15:07:34Z","timestamp":1725808054281},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,12,13]],"date-time":"2021-12-13T00:00:00Z","timestamp":1639353600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,12,13]],"date-time":"2021-12-13T00:00:00Z","timestamp":1639353600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,12,13]]},"DOI":"10.1109\/asru51503.2021.9688238","type":"proceedings-article","created":{"date-parts":[[2022,2,3]],"date-time":"2022-02-03T20:31:00Z","timestamp":1643920260000},"page":"328-334","source":"Crossref","is-referenced-by-count":9,"title":["Boundary and Context Aware Training for CIF-Based Non-Autoregressive End-to-End ASR"],"prefix":"10.1109","author":[{"given":"Fan","family":"Yu","sequence":"first","affiliation":[{"name":"Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University,Xi'an,China"}]},{"given":"Haoneng","family":"Luo","sequence":"additional","affiliation":[{"name":"Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University,Xi'an,China"}]},{"given":"Pengcheng","family":"Guo","sequence":"additional","affiliation":[{"name":"Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University,Xi'an,China"}]},{"given":"Yuhao","family":"Liang","sequence":"additional","affiliation":[{"name":"Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University,Xi'an,China"}]},{"given":"Zhuoyuan","family":"Yao","sequence":"additional","affiliation":[{"name":"Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University,Xi'an,China"}]},{"given":"Lei","family":"Xie","sequence":"additional","affiliation":[{"name":"Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University,Xi'an,China"}]},{"given":"Yingying","family":"Gao","sequence":"additional","affiliation":[{"name":"China Mobile Research Institute"}]},{"given":"Leijing","family":"Hou","sequence":"additional","affiliation":[{"name":"China Mobile Research Institute"}]},{"given":"Shilei","family":"Zhang","sequence":"additional","affiliation":[{"name":"China Mobile Research Institute"}]}],"member":"263","reference":[{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1619"},{"key":"ref30","first-page":"2207","article-title":"ESPnet: End-to-End speech pro-cessing toolkit","author":"watanabe","year":"0","journal-title":"Proc INTERSPEECH"},{"journal-title":"Proc ICLR","article-title":"Non-autoregressive neural ma-chine translation","year":"0","author":"jiatao","key":"ref10"},{"key":"ref11","first-page":"4273","article-title":"FlowSeq: Non-autoregressive con-ditional sequence generation with generative flow","author":"ma","year":"0","journal-title":"Proc EMNLP-IJCNLP"},{"key":"ref12","first-page":"3381","article-title":"Listen attentively, and spell once: Whole sentence generation via a non-autoregressive architecture for low-latency speech recognition","author":"ye","year":"2020","journal-title":"Proc INTERSPEECH"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref14","first-page":"3735","article-title":"Sequence to multi-sequence learning via con-ditional chain mapping for mixture signals","author":"shi","year":"0","journal-title":"Proc NeurIPS"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-2155"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2086"},{"key":"ref17","first-page":"3655","article-title":"Mask CTC: Non-autoregressive end-to-end ASR with CTC and mask pre-dict","author":"higuchi","year":"2020","journal-title":"Proc INTERSPEECH"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414198"},{"key":"ref19","first-page":"1403","article-title":"Imputer: Se-quence modelling via imputation and dynamic program-ming","author":"chan","year":"2020","journal-title":"Proc ICML"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462105"},{"journal-title":"Proc ICLR","article-title":"Adam: A method for stochastic optimization","year":"0","author":"kingma","key":"ref27"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953075"},{"key":"ref6","first-page":"5884","article-title":"Speech-Transformer: A no-recurrence sequence-to-sequence model for speech recognition","author":"dong","year":"2018","journal-title":"Proc ICASSP"},{"key":"ref29","first-page":"3586","article-title":"Audio augmentation for speech recog-nition","author":"ko","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref5","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"0","journal-title":"Proc NeurIPS"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-595"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2015-647"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1149"},{"key":"ref1","first-page":"577","article-title":"Attention-based models for speech recognition","author":"chorowski","year":"0","journal-title":"Proc NIPS"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413429"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054250"},{"key":"ref24","first-page":"1","article-title":"Aishell-1: An open-source mandarin speech corpus and a speech recognition baseline","author":"hui","year":"0","journal-title":"Proc O-COCOSDA"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414858"},{"journal-title":"ArXiv Preprint","article-title":"Aishell-2: Transforming Mandarin ASR research into industrial scale","year":"2018","author":"du","key":"ref26"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383506"}],"event":{"name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","start":{"date-parts":[[2021,12,13]]},"location":"Cartagena, Colombia","end":{"date-parts":[[2021,12,17]]}},"container-title":["2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9687821\/9687855\/09688238.pdf?arnumber=9688238","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,16]],"date-time":"2022-05-16T20:41:20Z","timestamp":1652733680000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9688238\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,12,13]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/asru51503.2021.9688238","relation":{},"subject":[],"published":{"date-parts":[[2021,12,13]]}}}