{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,14]],"date-time":"2024-09-14T19:50:20Z","timestamp":1726343420187},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2021,2,9]],"date-time":"2021-02-09T00:00:00Z","timestamp":1612828800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,2,9]],"date-time":"2021-02-09T00:00:00Z","timestamp":1612828800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"name":"the Scientific Research Project of Hebei Education Department of China","award":["QN2020198"]},{"name":"the Natural Science Foundation of the Colleges and Universities in Anhui Province of China","award":["KJ2020A0035"]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2022,2]]},"DOI":"10.1007\/s11042-020-10465-9","type":"journal-article","created":{"date-parts":[[2021,2,10]],"date-time":"2021-02-10T02:59:56Z","timestamp":1612925996000},"page":"4621-4647","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":20,"title":["An evaluation of deep neural network models for music classification using spectrograms"],"prefix":"10.1007","volume":"81","author":[{"ORCID":"http:\/\/orcid.org\/0000-0002-2450-6052","authenticated-orcid":false,"given":"Jingxian","family":"Li","sequence":"first","affiliation":[]},{"given":"Lixin","family":"Han","sequence":"additional","affiliation":[]},{"given":"Xiaoshuang","family":"Li","sequence":"additional","affiliation":[]},{"given":"Jun","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Baohua","family":"Yuan","sequence":"additional","affiliation":[]},{"given":"Zhinan","family":"Gou","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,2,9]]},"reference":[{"key":"10465_CR1","unstructured":"Aguiar RL, Costa YMG, Nanni L (2016) Music genre recognition using spectrograms with harmonic-percussive sound separation. In 35th International Conference of the Chilean Computer Science Society, Valparaiso, Chile, pp 1\u20137"},{"issue":"1","key":"10465_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1561\/2200000006","volume":"2","author":"Y Bengio","year":"2009","unstructured":"Bengio Y (2009) Learning deep architectures for AI. Foundations and trends in Machine Learning 2(1):1\u2013127","journal-title":"Foundations and trends in Machine Learning"},{"key":"10465_CR3","doi-asserted-by":"publisher","first-page":"1901","DOI":"10.1016\/j.procs.2020.03.209","volume":"167","author":"H Chaurasiya","year":"2020","unstructured":"Chaurasiya H (2020) Time-Frequency Representations: Spectrogram, Cochleogram and Correlogram. Procedia Computer Science 167:1901\u20131910","journal-title":"Procedia Computer Science"},{"key":"10465_CR4","unstructured":"Choi K, Fazekas G, Sandler M (2016) Automatic tagging using deep convolutional neural networks. arXiv preprint arXiv:1606.00298."},{"key":"10465_CR5","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1016\/j.asoc.2016.12.024","volume":"52","author":"YMG Costa","year":"2017","unstructured":"Costa YMG, Oliveira LS, Silla JCN, Silla CN Jr (2017) An evaluation of convolutional neural networks for music classification using spectrograms. Applied soft computing 52:28\u201338","journal-title":"Applied soft computing"},{"key":"10465_CR6","unstructured":"Defferrard M, Benzi K, Vandergheynst P et al (2016) Fma: A dataset for music analysis. arXiv preprint arXiv:1612.01840."},{"issue":"3\u20134","key":"10465_CR7","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1561\/2000000039","volume":"7","author":"L Deng","year":"2014","unstructured":"Deng L, Yu D (2014) Deep learning: methods and applications. Foundations and Trends in Signal Processing 7(3\u20134):197\u2013387","journal-title":"Foundations and Trends in Signal Processing"},{"key":"10465_CR8","unstructured":"Ferraro A, Bogdanov D, Jeon JH et al (2019) Music Auto-tagging Using CNNs and Mel-spectrograms with Reduced Frequency and Time Resolution. arXiv preprint arXiv:1911.04824."},{"key":"10465_CR9","unstructured":"Glauner PO (2015) Deep Convolutional Neural Networks for Smile Recognition (MSc Thesis). Imperial College London, Department of Computing. arXiv:1508.06535."},{"key":"10465_CR10","unstructured":"Gulli A, Pal S (2017) Deep learning with Keras. Packt Publishing Ltd."},{"key":"10465_CR11","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S et al. (2016) Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"10465_CR12","unstructured":"Howard A G, Zhu M, Chen B et al (2017) Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861."},{"key":"10465_CR13","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, Van Der Maaten L et al (2017) Densely connected convolutional networks. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp: 4700\u20134708.","DOI":"10.1109\/CVPR.2017.243"},{"key":"10465_CR14","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1016\/j.ins.2013.04.014","volume":"243","author":"P Khunarsal","year":"2013","unstructured":"Khunarsal P, Lursinsap C, Raicharoen T (2013) Very short time environmental sound classification based on spectrogram pattern matching. Information Sciences 243:57\u201374","journal-title":"Information Sciences"},{"key":"10465_CR15","doi-asserted-by":"crossref","unstructured":"Kim T, Lee J, Nam J (2018) Sample-level CNN architectures for music auto-tagging using raw waveforms. In 2018 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp: 366\u2013370.","DOI":"10.1109\/ICASSP.2018.8462046"},{"key":"10465_CR16","unstructured":"Kingma DP, Ba J (2014) Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980."},{"key":"10465_CR17","doi-asserted-by":"crossref","unstructured":"Kobayashi T, Kubota A, Suzuki Y (2018) Audio feature extraction based on sub-band signal correlations for music genre classification. In 2018 IEEE International Symposium on Multimedia. ISM, pp 180\u2013181.","DOI":"10.1109\/ISM.2018.00-15"},{"key":"10465_CR18","unstructured":"Kong Q, Feng X, Li Y (2014) Music genre classification using convolutional neural network. In Proc. Int. Soc. Music Inform. Retrieval (ISMIR)."},{"issue":"11","key":"10465_CR19","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun Y, Bottou L, Bengio Y et al (1998) Gradient-based learning applied to document recognition. Proceedings of the IEEE 86(11):2278\u20132324","journal-title":"Proceedings of the IEEE"},{"issue":"7553","key":"10465_CR20","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun Y, Bengio Y, Hinton G (2015) Hinton. Deep learning. Nature 521(7553):436\u2013444","journal-title":"Nature"},{"key":"10465_CR21","unstructured":"Lidy T, Schindler A (2016) Parallel convolutional neural networks for music genre and mood classification. MIREX2016."},{"key":"10465_CR22","unstructured":"Liu X, Chen Q, Wu X et al (2017) CNN based music emotion classification. arXiv preprint arXiv:1704.05665."},{"key":"10465_CR23","doi-asserted-by":"crossref","unstructured":"Ma X, Wu Z, Jia J et al (2018) Emotion Recognition from Variable-Length Speech Segments Using Deep Learning on Spectrograms. In Interspeech, pp 3683\u20133687","DOI":"10.21437\/Interspeech.2018-2228"},{"key":"10465_CR24","unstructured":"McKinney M, Breebaart J (2003) Features for audio and music classification. In Proc. ISMIR, pp 151\u2013158."},{"issue":"1","key":"10465_CR25","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1109\/MSP.2018.2874383","volume":"36","author":"J Nam","year":"2018","unstructured":"Nam J, Choi K, Lee J et al (2018) Deep learning for audio-based music classification and tagging: Teaching computers to distinguish rock from bach. IEEE Signal Processing Magazine 36(1):41\u201351","journal-title":"IEEE Signal Processing Magazine"},{"key":"10465_CR26","unstructured":"Panagakis Y, Kotropoulos C, Arce GR (2009) Music genre classification via sparse representations of auditory temporal modulations, In 2009 17th European Signal Processing Conference, IEEE, pp 1\u20135."},{"key":"10465_CR27","doi-asserted-by":"publisher","first-page":"334","DOI":"10.1016\/j.eswa.2018.05.016","volume":"114","author":"M Papakostas","year":"2018","unstructured":"Papakostas M, Giannakopoulos T (2018) Speech-music discrimination using deep visual feature extractors. Expert Systems with Applications 114:334\u2013344","journal-title":"Expert Systems with Applications"},{"key":"10465_CR28","doi-asserted-by":"crossref","unstructured":"Pons J, Serra X (2019) Randomly weighted CNNs for (music) audio classification. In ICASSP 2019\u20132019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 336\u2013340","DOI":"10.1109\/ICASSP.2019.8682912"},{"key":"10465_CR29","doi-asserted-by":"crossref","unstructured":"Sainath TN, Mohamed A, Kingsbury B et al (2013) Deep convolutional neural networks for LVCSR. In 2013 IEEE international conference on acoustics, speech and signal processing. IEEE, pp 8614\u20138618.","DOI":"10.1109\/ICASSP.2013.6639347"},{"key":"10465_CR30","doi-asserted-by":"crossref","unstructured":"Sandler M, Howard A, Zhu M et al (2018) Mobilenetv2: Inverted residuals and linear bottlenecks. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4510\u20134520.","DOI":"10.1109\/CVPR.2018.00474"},{"key":"10465_CR31","doi-asserted-by":"crossref","unstructured":"Satt A, Rozenberg S, Hoory R (2017) Efficient Emotion Recognition from Speech Using Deep Learning on Spectrograms. In INTERSPEECH, pp 1089\u20131093","DOI":"10.21437\/Interspeech.2017-200"},{"key":"10465_CR32","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556."},{"key":"10465_CR33","doi-asserted-by":"publisher","first-page":"104","DOI":"10.1016\/j.neucom.2018.02.076","volume":"292","author":"G Song","year":"2018","unstructured":"Song G, Wang Z, Han F et al (2018) Music auto-tagging using deep Recurrent Neural Networks. Neurocomputing 292:104\u2013110","journal-title":"Neurocomputing"},{"issue":"5","key":"10465_CR34","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1109\/TSA.2002.800560","volume":"10","author":"G Tzanetakis","year":"2002","unstructured":"Tzanetakis G, Cook P (2002) Musical genre classification of audio signals. IEEE Trans. Speech Audio Process 10(5):293\u2013302","journal-title":"IEEE Trans. Speech Audio Process"},{"key":"10465_CR35","unstructured":"Valerio V D, Pereira R M, Costa YMG et al (2018) A Resampling Approach for Imbalanceness on Music Genre Classification Using Spectrograms. In The Thirty-First International Flairs Conference."},{"key":"10465_CR36","doi-asserted-by":"crossref","unstructured":"Zhang W, Lei W, Xu X et al (2016) Improved Music Genre Classification with Convolutional Neural Networks. In INTERSPEECH, pp 3304\u20133308.","DOI":"10.21437\/Interspeech.2016-1236"},{"issue":"1","key":"10465_CR37","doi-asserted-by":"publisher","first-page":"74","DOI":"10.1093\/nsr\/nwy108","volume":"6","author":"ZH Zhou","year":"2019","unstructured":"Zhou ZH, Feng J (2019) Deep forest. National Science Review 6(1):74\u201386","journal-title":"National Science Review"},{"key":"10465_CR38","unstructured":"Zoph B, Le Q V (2016) Neural architecture search with reinforcement learning. arXiv preprint arXiv:1611.01578."},{"key":"10465_CR39","doi-asserted-by":"crossref","unstructured":"Zoph B, Vasudevan V, Shlens J et al (2018) Learning transferable architectures for scalable image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp 8697\u20138710.","DOI":"10.1109\/CVPR.2018.00907"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-10465-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-020-10465-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-10465-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,2,22]],"date-time":"2022-02-22T06:11:14Z","timestamp":1645510274000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-020-10465-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,2,9]]},"references-count":39,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2022,2]]}},"alternative-id":["10465"],"URL":"https:\/\/doi.org\/10.1007\/s11042-020-10465-9","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,2,9]]},"assertion":[{"value":"2 July 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 October 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 December 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 February 2021","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}