{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,7,17]],"date-time":"2024-07-17T00:19:16Z","timestamp":1721175556833},"reference-count":54,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2024,3,19]],"date-time":"2024-03-19T00:00:00Z","timestamp":1710806400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,3,19]],"date-time":"2024-03-19T00:00:00Z","timestamp":1710806400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2024,7]]},"DOI":"10.1007\/s00034-024-02641-1","type":"journal-article","created":{"date-parts":[[2024,3,19]],"date-time":"2024-03-19T19:02:47Z","timestamp":1710874967000},"page":"4239-4271","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Deep Convolutional Neural Networks for Predominant Instrument Recognition in Polyphonic Music Using Discrete Wavelet Transform"],"prefix":"10.1007","volume":"43","author":[{"ORCID":"http:\/\/orcid.org\/0000-0002-5697-4651","authenticated-orcid":false,"given":"Sukanta Kumar","family":"Dash","sequence":"first","affiliation":[]},{"given":"S. S.","family":"Solanki","sequence":"additional","affiliation":[]},{"given":"Soubhik","family":"Chakraborty","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,3,19]]},"reference":[{"key":"2641_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1177\/1550147720911009","volume":"16","author":"A al-Qerem","year":"2020","unstructured":"A. al-Qerem, F. Kharbat, S. Nashwan, S. Ashraf, K. Blaou, General model for best feature extraction of EEG using discrete wavelet transform wavelet family and differential evolution. Int. J. Distrib. Sens. Netw. 16, 1\u201321 (2020). https:\/\/doi.org\/10.1177\/1550147720911009","journal-title":"Int. J. Distrib. Sens. Netw."},{"key":"2641_CR2","doi-asserted-by":"publisher","first-page":"89781","DOI":"10.1109\/access.2022.3198988","volume":"10","author":"K Alsharabi","year":"2022","unstructured":"K. Alsharabi, Y.B. Salamah, A.M. Abdurraqeeb, M. Aljalal, F.A. Alturki, EEG signal processing for Alzheimer\u2019s disorders using discrete wavelet transform and machine learning approaches. IEEE Access 10, 89781\u201389797 (2022). https:\/\/doi.org\/10.1109\/access.2022.3198988","journal-title":"IEEE Access"},{"key":"2641_CR3","unstructured":"J.J. Aucouturier, Sounds like teen spirit: Computational insights into the grounding of everyday musical terms, in Language, Evolution and the Brain, Book Chapter-2 (City University of Hong Kong Press, 2009), pp. 35\u201364"},{"issue":"3","key":"2641_CR4","doi-asserted-by":"publisher","first-page":"407","DOI":"10.1007\/s10844-013-0258-3","volume":"41","author":"E Benetos","year":"2013","unstructured":"E. Benetos, S. Dixon, D. Giannoulis, H. Kirchhoff, A. Klapuri, Automatic music transcription: challenges and future directions. J. Intell. Inf. Syst. 41(3), 407\u2013434 (2013). https:\/\/doi.org\/10.1007\/s10844-013-0258-3","journal-title":"J. Intell. Inf. Syst."},{"key":"2641_CR5","doi-asserted-by":"publisher","unstructured":"J.J. Bosch, J. Janer, F. Fuhrmann, P. Herrera, A comparison of sound segregation techniques for predominant instrument recognition in musical audio signals, in Proceedings, International Society for Music Information Retrieval Conference (ISMIR 2012) (2012), pp. 559\u2013564. https:\/\/doi.org\/10.5281\/zenodo.1416075","DOI":"10.5281\/zenodo.1416075"},{"issue":"4","key":"2641_CR6","doi-asserted-by":"publisher","first-page":"68","DOI":"10.1063\/1.1580056","volume":"56","author":"L Debnath","year":"2003","unstructured":"L. Debnath, J.-P. Antoine, Wavelet transforms and their applications. Phys. Today 56(4), 68\u201368 (2003). https:\/\/doi.org\/10.1063\/1.1580056","journal-title":"Phys. Today"},{"issue":"2","key":"2641_CR7","doi-asserted-by":"publisher","first-page":"429","DOI":"10.1109\/tsmcb.2007.913394","volume":"38","author":"JD Deng","year":"2008","unstructured":"J.D. Deng, C. Simmermacher, S. Cranefield, A study on feature analysis for musical instrument classification. IEEE Trans. Syst. Man Cybern. Part B (Cybern.) 38(2), 429\u2013438 (2008). https:\/\/doi.org\/10.1109\/tsmcb.2007.913394","journal-title":"IEEE Trans. Syst. Man Cybern. Part B (Cybern.)"},{"key":"2641_CR8","doi-asserted-by":"publisher","unstructured":"Z. Duan, B. Pardo, L. Daudet, A novel Cepstral representation for timbre modeling of sound sources in polyphonic mixtures, in Proceedings, IEEE International Conference on Acoustic, Speech and Signal Processing (ICASSP) (2014), pp. 7495\u20137499. https:\/\/doi.org\/10.1109\/icassp.2014.6855057","DOI":"10.1109\/icassp.2014.6855057"},{"key":"2641_CR9","doi-asserted-by":"publisher","unstructured":"R.C. Eberhart, Y. Shi, Particle swarm optimization: development, applications and resources, in Proceedings, IEEE Conference on Evolutionary Computation, (IEEE Cat. No.01TH8546), ICEC, vol. 1 (2001), pp. 81\u201386. https:\/\/doi.org\/10.1109\/cec.2001.934374","DOI":"10.1109\/cec.2001.934374"},{"issue":"2","key":"2641_CR10","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1109\/tasl.2007.908128","volume":"16","author":"MR Every","year":"2008","unstructured":"M.R. Every, Discriminating between pitched sources in music audio. IEEE Trans. Audio Speech Lang. Process. 16(2), 267\u2013277 (2008). https:\/\/doi.org\/10.1109\/tasl.2007.908128","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"2641_CR11","unstructured":"F. Fuhrmann, P. Herrera, Polyphonic instrument recognition for exploring semantic similarities in music, in Proceedings, 13th International Conference on Digital Audio Effects (DAFx-10) (2010), pp. 1\u20138. http:\/\/mtg.upf.edu\/files\/publications\/ffuhrmann_ dafx10_ final_0.pdf"},{"key":"2641_CR12","doi-asserted-by":"publisher","unstructured":"D. Ghosal, M.H. Kolekar, Music genre recognition using deep neural networks and transfer learning, in Proceedings, Interspeech (2018), pp. 2087\u20132091. https:\/\/doi.org\/10.21437\/interspeech.2018-2045","DOI":"10.21437\/interspeech.2018-2045"},{"issue":"9","key":"2641_CR13","doi-asserted-by":"publisher","first-page":"1805","DOI":"10.1109\/tasl.2013.2248720","volume":"21","author":"D Giannoulis","year":"2013","unstructured":"D. Giannoulis, A. Klapuri, Musical instrument recognition in polyphonic audio using missing feature approach. IEEE Trans. Audio Speech Lang. Process. 21(9), 1805\u20131817 (2013). https:\/\/doi.org\/10.1109\/tasl.2013.2248720","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"2641_CR14","unstructured":"X. Glorot, Y. Bengio, Understanding the difficulty of training deep feedforward neural networks, in Proceedings, 13th International Conference on Artificial Intelligence and Statistics (AISTATS), vol. 9, Chia Laguna Resort, Sardinia, Italy (2010), pp. 249\u2013256. https:\/\/proceedings.mlr.press\/v9\/glorot10a\/glorot10a.pdf"},{"key":"2641_CR15","unstructured":"M. Goto, H. Hashiguchi, T. Nishimura, R. Oka, RWC music database: popular, classical, and jazz music database, in Proceedings, 3rd International Conference on Music Information Retrieval (ISMIR) (2002), pp. 287\u2013288. https:\/\/www.researchgate.net\/publication\/220723431"},{"key":"2641_CR16","unstructured":"S. Gururani, C. Summers, A. Lerch, Instrument activity detection in polyphonic music using deep neural networks, in Proceedings, International Society for Music Information Retrieval Conference, Paris, France (2018), pp. 569\u2013576. https:\/\/www.researchgate.net\/publication\/ 332621784"},{"issue":"1","key":"2641_CR17","doi-asserted-by":"publisher","first-page":"208","DOI":"10.1109\/taslp.2016.2632307","volume":"25","author":"Y Han","year":"2016","unstructured":"Y. Han, J. Kim, K. Lee, Deep convolutional neural networks for predominant instrument recognition in polyphonic music. IEEE\/ACM Trans. Audio. Speech Lang. Process. 25(1), 208\u2013221 (2016). https:\/\/doi.org\/10.1109\/taslp.2016.2632307","journal-title":"IEEE\/ACM Trans. Audio. Speech Lang. Process."},{"key":"2641_CR18","doi-asserted-by":"publisher","unstructured":"K.K. Hasan, U.K. Ngah, M.F.M. Salleh, Multilevel decomposition discrete wavelet transform for hardware image compression architectures applications, in Proceedings, IEEE International Conference on Control System, Computing and Engineering, Penang, Malaysia (2013), pp. 315\u2013320. https:\/\/doi.org\/10.1109\/iccsce.2013.6719981","DOI":"10.1109\/iccsce.2013.6719981"},{"key":"2641_CR19","unstructured":"T. Heittola, A. Klapuri, T. Virtanen, Musical instrument recognition in polyphonic audio using source-filter model for sound separation, in Proceedings, International Society for Music Information Retrieval Conference (ISMIR) (2009), pp. 327\u2013332. https:\/\/www.researchgate.net\/publication\/220723588"},{"key":"2641_CR20","doi-asserted-by":"publisher","unstructured":"J. Huang, Y. Dong, J. Liu, C. Dong, H. Wang, Sports audio segmentation and classification, in Proceedings, International Conference on Network Infrastructure and Digital Content (IC-NIDC ?09) (IEEE, Beijing, China, 2009), pp. 379\u2013383. https:\/\/doi.org\/10.1109\/icnidc.2009.5360872","DOI":"10.1109\/icnidc.2009.5360872"},{"key":"2641_CR21","doi-asserted-by":"publisher","unstructured":"R.T. Irene, C. Borrelli, M. Zanoni, M. Buccoli, A. Sarti, Automatic playlist generation using convolutional neural networks and recurrent neural networks, in Proceedings, European Signal Processing Conference (EUSIPCO) (IEEE, 2019), pp. 1\u20135. https:\/\/doi.org\/10.23919\/eusipco.2019.8903002","DOI":"10.23919\/eusipco.2019.8903002"},{"key":"2641_CR22","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1155\/2007\/51979","volume":"2007","author":"T Kitahara","year":"2007","unstructured":"T. Kitahara, M. Goto, K. Komatani, T. Ogata, H.G. Okuno, Instrument identification in polyphonic music: feature weighting to minimize influence of sound overlaps. J. Appl. Signal Process. (EURASIP) 2007, 155\u2013155 (2007). https:\/\/doi.org\/10.1155\/2007\/51979","journal-title":"J. Appl. Signal Process. (EURASIP)"},{"issue":"7553","key":"2641_CR23","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"Y. LeCun, Y. Bengio, G. Hinton, Deep learning. Nature 521(7553), 436\u2013444 (2015). https:\/\/doi.org\/10.1038\/nature14539","journal-title":"Nature"},{"issue":"6","key":"2641_CR24","doi-asserted-by":"publisher","first-page":"3464","DOI":"10.1007\/s00034-022-02278-y","volume":"42","author":"CR Lekshmi","year":"2023","unstructured":"C.R. Lekshmi, R. Rajeev, Multiple predominant instruments recognition in polyphonic music using spectro\/modgd-gram fusion. Circuits Syst. Signal Process. 42(6), 3464\u20133484 (2023). https:\/\/doi.org\/10.1007\/s00034-022-02278-y","journal-title":"Circuits Syst. Signal Process."},{"key":"2641_CR25","doi-asserted-by":"publisher","unstructured":"P. Li, J. Qian, T. Wang, Automatic instrument recognition in polyphonic music using convolutional neural networks (2015), pp. 1\u20135. https:\/\/doi.org\/10.48550\/arXiv.1511.05520. arXiv:1511.05520","DOI":"10.48550\/arXiv.1511.05520"},{"issue":"2","key":"2641_CR26","doi-asserted-by":"publisher","first-page":"790","DOI":"10.1109\/tii.2017.2739340","volume":"14","author":"P Li","year":"2018","unstructured":"P. Li, Z. Chen, L.T. Yang, Q. Zhang, M.J. Deen, Deep convolutional computation model for feature learning on big data in Internet of Things. IEEE Trans. Ind. Inf. 14(2), 790\u2013798 (2018). https:\/\/doi.org\/10.1109\/tii.2017.2739340","journal-title":"IEEE Trans. Ind. Inf."},{"issue":"8","key":"2641_CR27","doi-asserted-by":"publisher","first-page":"1256","DOI":"10.1109\/taslp.2019.2915167","volume":"27","author":"Y Luo","year":"2019","unstructured":"Y. Luo, N. Mesgarani, Conv-tasnet: surpassing ideal time-frequency magnitude masking for speech separation. IEEE\/ACM Trans. Audio Speech Lang. Process. 27(8), 1256\u20131266 (2019). https:\/\/doi.org\/10.1109\/taslp.2019.2915167","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"issue":"1","key":"2641_CR28","doi-asserted-by":"publisher","first-page":"42","DOI":"10.1016\/j.amc.2007.10.069","volume":"207","author":"E Magosso","year":"2009","unstructured":"E. Magosso, M. Ursino, A. Zaniboni, E. Gardella, A wavelet-based energetic approach for the analysis of biomedical signals: application to the electroencephalogram and electro-oculogram. Appl. Math. Comput. 207(1), 42\u201362 (2009). https:\/\/doi.org\/10.1016\/j.amc.2007.10.069","journal-title":"Appl. Math. Comput."},{"key":"2641_CR29","doi-asserted-by":"publisher","unstructured":"B. McFee, C. Raffel, D. Liang, D.P.W. Ellis, M. McVicar, E. Battenberg, O. Nieto, Librosa: audio and music signal analysis in Python, in Proceedings, 14th Python in Science Conference (SCIPY 2015), vol. 8 (2015), pp. 18\u201325. https:\/\/doi.org\/10.25080\/majora-7b98e3ed-003","DOI":"10.25080\/majora-7b98e3ed-003"},{"key":"2641_CR30","unstructured":"V. Nair, G.E. Hinton, Rectified linear units improve restricted Boltzmann machines, in Proceedings, 27th International Conference on Machine Learning, Haifa, Israel (2010), pp. 807\u2013814. https:\/\/www.cs.toronto.edu\/~fritz\/absps\/reluICML.pdf"},{"key":"2641_CR31","doi-asserted-by":"publisher","first-page":"208","DOI":"10.1016\/j.neunet.2019.06.010","volume":"118","author":"T-L Nguyen","year":"2019","unstructured":"T.-L. Nguyen, S. Kavuri, M. Lee, A multimodal convolutional neuro-fuzzy network for emotional understanding of movie clips. Neural Netw. 118, 208\u2013219 (2019). https:\/\/doi.org\/10.1016\/j.neunet.2019.06.010","journal-title":"Neural Netw."},{"key":"2641_CR32","unstructured":"[Online]. Available: http:\/\/theremin.music.uiowa.edu\/MIS.html"},{"key":"2641_CR33","unstructured":"F.J. Opolko, J. Wapnick, Mcgill University master samples. Montreal, QC, Canada: McGill University, Faculty of Music (1987). https:\/\/www.worldcat.org\/title\/mums-mcgill-university-master-samples\/oclc\/17946083"},{"key":"2641_CR34","doi-asserted-by":"publisher","unstructured":"J. Pons, O. Slizovskaia, R. Gong, E. Gomez, X. Serra, Timbre analysis of music audio signals with convolutional neural networks, in Proceedings, 25th European Signal Processing Conference (IEEE, 2017), pp. 2744\u20132748. https:\/\/doi.org\/10.23919\/eusipco.2017.8081710","DOI":"10.23919\/eusipco.2017.8081710"},{"key":"2641_CR35","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1007\/978-3-642-35289-8_5","volume-title":"Neural Networks: Tricks of the Trade. Lecture Notes in Computer Science","author":"L Prechelt","year":"2012","unstructured":"L. Prechelt, Early stopping\u2014but when?, in Neural Networks: Tricks of the Trade. Lecture Notes in Computer Science, vol. 7700, ed. by G.B. Orr, K.R. Muller (Springer, Berlin, 2012), pp.53\u201367. https:\/\/doi.org\/10.1007\/978-3-642-35289-8_5"},{"issue":"2","key":"2641_CR36","doi-asserted-by":"publisher","first-page":"206","DOI":"10.1109\/jstsp.2019.2908700","volume":"13","author":"H Purwins","year":"2019","unstructured":"H. Purwins, B. Li, T. Virtanen, J. Schluter, S.-Y. Chang, T. Sainath, Deep learning for audio signal processing. IEEE J. Sel. Top. Signal process 13(2), 206\u2013219 (2019). https:\/\/doi.org\/10.1109\/jstsp.2019.2908700","journal-title":"IEEE J. Sel. Top. Signal process"},{"issue":"5","key":"2641_CR37","doi-asserted-by":"publisher","first-page":"1","DOI":"10.3390\/math9050530","volume":"9","author":"L Qiu","year":"2021","unstructured":"L. Qiu, S. Li, Y. Sung, DBTMPE: deep bidirectional transformers-based masked predictive encoder approach for music genre classification. Mathematics 9(5), 1\u201317 (2021). https:\/\/doi.org\/10.3390\/math9050530","journal-title":"Mathematics"},{"key":"2641_CR38","volume-title":"Theory and Applications of Digital Speech Processing","author":"LR Rabiner","year":"2010","unstructured":"L.R. Rabiner, R.W. Schafer, Theory and Applications of Digital Speech Processing (Prentice Hall Press, Hoboken, 2010)"},{"issue":"1","key":"2641_CR39","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13636-022-00245-8","volume":"2022","author":"LC Reghunath","year":"2022","unstructured":"L.C. Reghunath, R. Rajan, Transformer-based ensemble method for multiple predominant instruments recognition in polyphonic music. EURASIP J. Audio Speech Music Process. 2022(1), 1\u201314 (2022). https:\/\/doi.org\/10.1186\/s13636-022-00245-8","journal-title":"EURASIP J. Audio Speech Music Process."},{"issue":"4","key":"2641_CR40","doi-asserted-by":"publisher","first-page":"1607","DOI":"10.1109\/jbhi.2018.2867619","volume":"23","author":"A Sano","year":"2019","unstructured":"A. Sano, W. Chen, D. Lopez-Martinez, S. Taylor, R.W. Picard, Multimodal ambulatory sleep detection using LSTM recurrent neural networks. IEEE J. Biomed. Health Inform. 23(4), 1607\u20131617 (2019). https:\/\/doi.org\/10.1109\/jbhi.2018.2867619","journal-title":"IEEE J. Biomed. Health Inform."},{"key":"2641_CR41","doi-asserted-by":"publisher","first-page":"1276","DOI":"10.1109\/taslp.2023.3252272","volume":"31","author":"K Schulze-Forster","year":"2023","unstructured":"K. Schulze-Forster, K.G. Richard, L. Kelley, C.S.J. Doire, R. Badeau, Unsupervised music source separation using differentiable parametric source models. IEEE\/ACM Trans. Audio Speech Lang. Process. 31, 1276\u20131289 (2023). https:\/\/doi.org\/10.1109\/taslp.2023.3252272","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2641_CR42","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1016\/j.patrec.2017.03.023","volume":"94","author":"M Sharma","year":"2017","unstructured":"M. Sharma, R.B. Pachori, U.R. Acharya, A new approach to characterize epileptic seizures using analytic time-frequency flexible wavelet transform and fractal dimension. Pattern Recogn. Lett. 94, 172\u2013179 (2017). https:\/\/doi.org\/10.1016\/j.patrec.2017.03.023","journal-title":"Pattern Recogn. Lett."},{"issue":"1","key":"2641_CR43","doi-asserted-by":"publisher","first-page":"308","DOI":"10.1109\/jbhi.2022.3210996","volume":"27","author":"L Shi","year":"2023","unstructured":"L. Shi, Y. Zhang, J. Zhang, Lung sound recognition method based on wavelet feature enhancement and time-frequency synchronous modeling. IEEE J. Biomed. Health Inform. 27(1), 308\u2013318 (2023). https:\/\/doi.org\/10.1109\/jbhi.2022.3210996","journal-title":"IEEE J. Biomed. Health Inform."},{"issue":"10","key":"2641_CR44","doi-asserted-by":"publisher","first-page":"1733","DOI":"10.1109\/tmm.2015.2428998","volume":"17","author":"D Stowell","year":"2015","unstructured":"D. Stowell, D. Giannoulis, E. Benetos, M. Lagrange, M.D. Plumbley, Detection and classification of acoustic scenes and events. IEEE Trans. Multimed. 17(10), 1733\u20131746 (2015). https:\/\/doi.org\/10.1109\/tmm.2015.2428998","journal-title":"IEEE Trans. Multimed."},{"key":"2641_CR45","doi-asserted-by":"publisher","unstructured":"M. Sukhavasi, S. Adapa, Music theme recognition using CNN and self-attention (2019). https:\/\/doi.org\/10.48550\/arXiv.1911.07041, arXiv preprint arXiv:1911.07041","DOI":"10.48550\/arXiv.1911.07041"},{"key":"2641_CR46","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.bspc.2020.101872","volume":"58","author":"T Tuncer","year":"2020","unstructured":"T. Tuncer, S. Dogan, A. Subasi, Surface EMG signal classification using ternary pattern and discrete wavelet transform based feature extraction for hand movement recognition. Biomed. Signal Process. Control 58, 1\u201312 (2020). https:\/\/doi.org\/10.1016\/j.bspc.2020.101872","journal-title":"Biomed. Signal Process. Control"},{"key":"2641_CR47","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.bspc.2021.102591","volume":"68","author":"T Tuncer","year":"2021","unstructured":"T. Tuncer, S. Dogan, A. Subasi, EEG-based driving fatigue detection using multilevel feature extraction and iterative hybrid feature selection. Biomed. Signal Process. Control 68, 1\u201311 (2021). https:\/\/doi.org\/10.1016\/j.bspc.2021.102591","journal-title":"Biomed. Signal Process. Control"},{"key":"2641_CR48","doi-asserted-by":"publisher","first-page":"2245","DOI":"10.1007\/s00371-022-02406-4","volume":"39","author":"SP Vaidya","year":"2022","unstructured":"S.P. Vaidya, Fingerprint-based robust medical image watermarking in hybrid transform. Vis. Comput. 39, 2245\u20132260 (2022). https:\/\/doi.org\/10.1007\/s00371-022-02406-4","journal-title":"Vis. Comput."},{"issue":"8","key":"2641_CR49","doi-asserted-by":"publisher","first-page":"1336","DOI":"10.1109\/taslp.2017.2738443","volume":"26","author":"C-Y Wang","year":"2018","unstructured":"C.-Y. Wang, J.C. Wang, A. Santoso, C.C. Chiang, C.H. Wu, Sound event recognition using auditory-receptive-field binary pattern and hierarchical-diving deep belief network. IEEE\/ACM Trans. Audio Speech Lang. Process. 26(8), 1336\u20131351 (2018). https:\/\/doi.org\/10.1109\/taslp.2017.2738443","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2641_CR50","unstructured":"Wikipedia contributors. Mel-frequency cepstrum\u2014Wikipedia, the free encyclopedia (2019). https:\/\/en.wikipedia.org\/w\/index.php?title=Mel-frequency_cepstrum &oldid=917928298"},{"issue":"6","key":"2641_CR51","doi-asserted-by":"publisher","first-page":"1124","DOI":"10.1109\/jstsp.2011.2158064","volume":"5","author":"J Wu","year":"2011","unstructured":"J. Wu, E. Vincent, S.A. Raczynski, T. Nishimoto, N. Ono, S. Sagayama, Polyphonic pitch estimation and instrument identification by joint modeling of sustained and attack sounds. IEEE J. Sel. Top. Signal Process. 5(6), 1124\u20131132 (2011). https:\/\/doi.org\/10.1109\/jstsp.2011.2158064","journal-title":"IEEE J. Sel. Top. Signal Process."},{"issue":"2","key":"2641_CR52","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1109\/msp.2006.1621449","volume":"23","author":"X Wu","year":"2006","unstructured":"X. Wu, C.-W. Ngo, Q. Li, Threading and auto documenting news videos: a promising solution to rapidly browse news topics. IEEE Signal Process. Mag. 23(2), 59\u201368 (2006). https:\/\/doi.org\/10.1109\/msp.2006.1621449","journal-title":"IEEE Signal Process. Mag."},{"key":"2641_CR53","doi-asserted-by":"publisher","first-page":"852","DOI":"10.1109\/taslp.2020.2971419","volume":"28","author":"D Yu","year":"2020","unstructured":"D. Yu, H. Duan, J. Fang, B. Zeng, Predominant instrument recognition based on deep neural network with auxiliary classification. IEEE\/ACM Trans. Audio Speech Lang. Process. 28, 852\u2013861 (2020). https:\/\/doi.org\/10.1109\/taslp.2020.2971419","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2641_CR54","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.micpro.2021.104134","volume":"84","author":"N Zermi","year":"2021","unstructured":"N. Zermi, A. Khaldi, M.R. Kafi, F. Kahlessenane, S. Euschi, Robust SVD-based schemes for medical image watermarking. Microprocess. Microsyst. 84, 1\u201312 (2021). https:\/\/doi.org\/10.1016\/j.micpro.2021.104134","journal-title":"Microprocess. Microsyst."}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-024-02641-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-024-02641-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-024-02641-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,16]],"date-time":"2024-07-16T11:10:52Z","timestamp":1721128252000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-024-02641-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,19]]},"references-count":54,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2024,7]]}},"alternative-id":["2641"],"URL":"https:\/\/doi.org\/10.1007\/s00034-024-02641-1","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"value":"0278-081X","type":"print"},{"value":"1531-5878","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,3,19]]},"assertion":[{"value":"4 June 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 January 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 March 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}