{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,7,23]],"date-time":"2024-07-23T00:21:05Z","timestamp":1721694065976},"reference-count":23,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2024,6]]},"DOI":"10.1007\/s10772-024-10112-w","type":"journal-article","created":{"date-parts":[[2024,6,9]],"date-time":"2024-06-09T09:01:34Z","timestamp":1717923694000},"page":"377-387","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Attentional multi-feature fusion for spoofing-aware speaker verification"],"prefix":"10.1007","volume":"27","author":[{"given":"Qian","family":"Shen","sequence":"first","affiliation":[]},{"given":"Mengxi","family":"Guo","sequence":"additional","affiliation":[]},{"given":"YiDa","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Jianfen","family":"Ma","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,6,9]]},"reference":[{"issue":"1","key":"10112_CR1","first-page":"119","volume":"11","author":"P Abdzadeh","year":"2023","unstructured":"Abdzadeh, P., & Veisi, H. (2023). A Comparison of CQT spectrogram with STFT-based Acoustic features in deep learning-based synthetic speech detection. Journal of AI and Data Mining, 11(1), 119\u2013129.","journal-title":"Journal of AI and Data Mining"},{"key":"10112_CR2","doi-asserted-by":"crossref","unstructured":"Chung, J. S., Nagrani, A., & Zisserman, A. (2018). Voxceleb2: Deep speaker recognition.","DOI":"10.21437\/Interspeech.2018-1929"},{"key":"10112_CR3","doi-asserted-by":"crossref","unstructured":"Dai, Z., Yang, Z., Yang, Y., Carbonell, J., Le, Q. V., & Salakhutdinov, R. (2019). Transformer-xl: Attentive language models beyond a fixed-length context.","DOI":"10.18653\/v1\/P19-1285"},{"key":"10112_CR4","doi-asserted-by":"crossref","unstructured":"Dai, Y., Gieseke, F., Oehmcke, S., Wu, Y., & Barnard, K. (2021). Attentional feature fusion. In Proceedings of the IEEE\/CVF winter conference on applications of computer vision (pp. 3560\u20133569).","DOI":"10.1109\/WACV48630.2021.00360"},{"key":"10112_CR5","doi-asserted-by":"publisher","unstructured":"Desplanques, B., Thienpondt, J., & Demuynck, K. (2020). Ecapa-tdnn: Emphasized channel attention, propagation and aggregation in tdnn based speaker verification. https:\/\/doi.org\/10.21437\/Interspeech.2020-2650.","DOI":"10.21437\/Interspeech.2020-2650"},{"key":"10112_CR6","doi-asserted-by":"crossref","unstructured":"Gao, Z., Song, Y., McLoughlin, I., Li, P., Jiang, Y., & Dai, L. R. (2019). Improving aggregation and loss function for better embedding learning in end-to-end speaker verification system. In Interspeech (pp. 361\u2013365).","DOI":"10.21437\/Interspeech.2019-1489"},{"key":"10112_CR7","doi-asserted-by":"crossref","unstructured":"Gulati, A., Qin, J., Chiu, C. C., Parmar, N., Zhang, Y., Yu, J., & Pang, R. (2020). Conformer: Convolution-augmented transformer for speech recognition.","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"10112_CR8","doi-asserted-by":"crossref","unstructured":"Heo, H. J., Shin, U. H., Lee, R., Cheon, Y., & Park, H. M. (2024, April). NeXt-TDNN: Modernizing multi-scale temporal convolution backbone for speaker verification. In ICASSP 2024\u20132024 IEEE international conference on acoustics, speech and signal processing (ICASSP 2024) (pp. 11186\u201311190). IEEE.","DOI":"10.1109\/ICASSP48485.2024.10447037"},{"key":"10112_CR9","doi-asserted-by":"crossref","unstructured":"Hou, Q., Zhou, D., & Feng, J. (2021). Coordinate attention for efficient mobile network design. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 13713\u201313722).","DOI":"10.1109\/CVPR46437.2021.01350"},{"key":"10112_CR10","doi-asserted-by":"crossref","unstructured":"Jung, J. W., Heo, H. S., Tak, H., Shim, H. J., Chung, J. S., Lee, B. J., & Evans, N. (2022). Aasist: Audio anti-spoofing using integrated spectro-temporal graph attention networks. In ICASSP 2022\u20132022 IEEE international conference on acoustics, speech and signal processing (ICASSP 2022) (pp. 6367\u20136371). IEEE.","DOI":"10.1109\/ICASSP43922.2022.9747766"},{"key":"10112_CR11","doi-asserted-by":"crossref","unstructured":"Jung, J. W., Tak, H., Shim, H. J., Heo, H. S., Lee, B. J., Chung, S. W.,& Kinnunen, T. (2022). Sasv challenge 2022: A spoofing aware speaker verification challenge evaluation plan.","DOI":"10.21437\/Interspeech.2022-11270"},{"key":"10112_CR12","doi-asserted-by":"crossref","unstructured":"Ko, T., Peddinti, V., Povey, D., Seltzer, M. L., & Khudanpur, S. (2017). A study on data augmentation of reverberant speech for robust speech recognition. In IEEE international conference on acoustics, speech and signal processing (ICASSP 2017) (pp. 5220\u20135224), March 5\u20139, 2017, New Orleans, USA. IEEE.","DOI":"10.1109\/ICASSP.2017.7953152"},{"key":"10112_CR13","doi-asserted-by":"crossref","unstructured":"Matejka, P., Novotn\u00fd, O., Plchot, O., Burget, L., S\u00e1nchez, M. D., & Cernock\u00fd, J. (2017). Analysis of score normalization in multilingual speaker recognition. In Interspeech (pp. 1567\u20131571).","DOI":"10.21437\/Interspeech.2017-803"},{"key":"10112_CR14","doi-asserted-by":"crossref","unstructured":"Nagrani, A., Chung, J. S., & Zisserman, A. (2017). Voxceleb: A large-scale speaker identification dataset.","DOI":"10.21437\/Interspeech.2017-950"},{"key":"10112_CR15","doi-asserted-by":"crossref","unstructured":"Tang, Y., Ding, G., Huang, J., He, X., & Zhou, B. (2019). Deep speaker embedding learning with multi-level pooling for text-independent speaker verification. In ICASSP 2019\u20132019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (pp. 6116\u20136120). IEEE.","DOI":"10.1109\/ICASSP.2019.8682712"},{"key":"10112_CR16","first-page":"283","volume":"2016","author":"M Todisco","year":"2016","unstructured":"Todisco, M., Delgado, H., & Evans, N. W. (2016). A new feature for automatic speaker verification anti-spoofing: Constant Q cepstral coefficients. In Odyssey, 2016, 283\u2013290.","journal-title":"In Odyssey"},{"key":"10112_CR17","unstructured":"Tworkowski, S., Staniszewski, K., Pacek, M., Wu, Y., Michalewski, H., & Mi\u0142o\u015b, P. (2024). Focused transformer: Contrastive training for context scaling. Advances in Neural Information Processing Systems, 36"},{"key":"10112_CR18","doi-asserted-by":"publisher","first-page":"2267","DOI":"10.1109\/TASLP.2022.3182856","volume":"30","author":"R Wang","year":"2022","unstructured":"Wang, R., Wei, Z., Duan, H., Ji, S., Long, Y., & Hong, Z. (2022). EfficientTDNN: Efficient architecture search for speaker recognition. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 30, 2267\u20132279.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"10112_CR19","doi-asserted-by":"publisher","first-page":"101114","DOI":"10.1016\/j.csl.2020.101114","volume":"64","author":"X Wang","year":"2020","unstructured":"Wang, X., Yamagishi, J., Todisco, M., Delgado, H., Nautsch, A., Evans, N., & Ling, Z. H. (2020). ASVspoof 2019: A large-scale public database of synthesized, converted and replayed speech. Computer Speech & Language, 64, 101114.","journal-title":"Computer Speech & Language"},{"key":"10112_CR20","unstructured":"Wang, Y., Nishizaki, H., & Li, M. (2023). Pretraining conformer with ASR or ASV for anti-spoofing countermeasure. Preprint retrieved from arXiv:2307.01546"},{"key":"10112_CR21","doi-asserted-by":"publisher","first-page":"121361","DOI":"10.1016\/j.eswa.2023.121361","volume":"237","author":"T Xie","year":"2024","unstructured":"Xie, T., Dai, K., Wang, K., Li, R., & Zhao, L. (2024). Deepmatcher: A deep transformer-based network for robust and accurate local feature matching. Expert Systems with Applications, 237, 121361.","journal-title":"Expert Systems with Applications"},{"key":"10112_CR22","doi-asserted-by":"publisher","first-page":"102622","DOI":"10.1016\/j.dsp.2019.102622","volume":"97","author":"J Yang","year":"2020","unstructured":"Yang, J., & Das, R. K. (2020). Long-term high frequency features for synthetic speech detection. Digital Signal Processing, 97, 102622.","journal-title":"Digital Signal Processing"},{"key":"10112_CR23","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Lv, Z., Wu, H., Zhang, S., Hu, P., Wu, Z., & Meng, H. (2022). Mfa-conformer: Multi-scale feature aggregation conformer for automatic speaker verification.","DOI":"10.21437\/Interspeech.2022-563"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-024-10112-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-024-10112-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-024-10112-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,22]],"date-time":"2024-07-22T16:06:57Z","timestamp":1721664417000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-024-10112-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6]]},"references-count":23,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2024,6]]}},"alternative-id":["10112"],"URL":"https:\/\/doi.org\/10.1007\/s10772-024-10112-w","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,6]]},"assertion":[{"value":"27 December 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 May 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 June 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}