{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,6,22]],"date-time":"2024-06-22T16:40:55Z","timestamp":1719074455993},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2017,2,21]],"date-time":"2017-02-21T00:00:00Z","timestamp":1487635200000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multidim Syst Sign Process"],"published-print":{"date-parts":[[2018,7]]},"DOI":"10.1007\/s11045-017-0476-x","type":"journal-article","created":{"date-parts":[[2017,2,21]],"date-time":"2017-02-21T17:54:45Z","timestamp":1487699685000},"page":"887-902","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Deep learning of chroma representation for cover song identification in compression domain"],"prefix":"10.1007","volume":"29","author":[{"given":"Jiunn-Tsair","family":"Fang","sequence":"first","affiliation":[]},{"given":"Yu-Ruey","family":"Chang","sequence":"additional","affiliation":[]},{"given":"Pao-Chi","family":"Chang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,2,21]]},"reference":[{"issue":"13","key":"476_CR1","first-page":"834","volume":"3","author":"AJ Al-Shareef","year":"2008","unstructured":"Al-Shareef, A. J., Mohamed, E. A., & Al-Judaibi, E. (2008). One hour ahead load forecasting using artificial neural network for the western area of Saudi Arabia. International Journal of Electrical and Computer Engineering, 3(13), 834\u2013840.","journal-title":"International Journal of Electrical and Computer Engineering"},{"key":"476_CR2","doi-asserted-by":"crossref","unstructured":"Bengio, Y., Lamblin, P., Popovici, D., & Larochelle, H. (2007). Greedy layer-wise training of deep networks. In Proceedings of the Advances in Neural Information Processing Systems (pp. 153\u2013160).","DOI":"10.7551\/mitpress\/7503.003.0024"},{"issue":"1","key":"476_CR3","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1561\/2200000006","volume":"2","author":"Y Bengio","year":"2009","unstructured":"Bengio, Y. (2009). Learning deep architectures for AI. Foundations and Trends and in Machine Learning, 2(1), 1\u2013127.","journal-title":"Foundations and Trends and in Machine Learning"},{"issue":"8","key":"476_CR4","doi-asserted-by":"crossref","first-page":"1798","DOI":"10.1109\/TPAMI.2013.50","volume":"35","author":"Y Bengio","year":"2013","unstructured":"Bengio, Y., Courville, A., & Vincent, P. (2013). Representation learning: A review and new perspectives. IEEE Transactions on Pattern Analysis and Machine Intelligence, 35(8), 1798\u20131828.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"3","key":"476_CR5","doi-asserted-by":"crossref","first-page":"167","DOI":"10.1080\/00031305.1992.10475878","volume":"46","author":"G Casella","year":"1992","unstructured":"Casella, G., & George, E. I. (1992). Explaining the Gibbs sampler. The American Statistician, 46(3), 167\u2013174.","journal-title":"The American Statistician"},{"key":"476_CR6","doi-asserted-by":"crossref","unstructured":"Chang, T. M., Chen, E. T., Hsieh, C. B., & Chang, P. C. (2013). Cover song identification with direct chroma feature extraction from AAC files. In Proceedings of GCCE, Tokyo (pp. 55\u201356).","DOI":"10.1109\/GCCE.2013.6664919"},{"key":"476_CR7","first-page":"469","volume":"23","author":"GE Dahl","year":"2010","unstructured":"Dahl, G. E., et al. (2010). Phone recognitionwith the mean-covariance restricted Boltzmann machine. Advances in Neural Information Processing Systems, 23, 469\u2013477.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"476_CR8","unstructured":"Ellis, D. (2006). Beat tracking with dynamic programming. In MIREX 2006 audio beat tracking contest system description."},{"key":"476_CR9","doi-asserted-by":"crossref","unstructured":"Ellis, D. P. W., & Poliner, G. E. (2007). Identifying cover songs with chroma features and dynamic programming beat tracking. In Proceedings of the international conference on acoustics, speech and signal processing (ICASSP), Honolulu, HI (pp. 1429\u20131432).","DOI":"10.1109\/ICASSP.2007.367348"},{"key":"476_CR10","unstructured":"Fujishima, T. (1999). Realtime chord recognition of musical sound: A system using common lisp music. In Proceedings of international computer music conference, Beijing (pp. 464\u2013467)."},{"key":"476_CR11","unstructured":"Hinton, G. E., Srivastava, N., Krizhevsky, A., Sutskever, I., & Salakhutdinov, R. R. (2012). Improving neural networks by preventing co-adaptation of feature detectors. ArXiv e-prints 1207, 580."},{"issue":"6","key":"476_CR12","doi-asserted-by":"crossref","first-page":"82","DOI":"10.1109\/MSP.2012.2205597","volume":"29","author":"GE Hinton","year":"2012","unstructured":"Hinton, G. E., et al. (2012). Deep neural networks for acoustic modeling in speech recognition. IEEE Signal Processing Magazine, 29(6), 82\u201397.","journal-title":"IEEE Signal Processing Magazine"},{"issue":"7","key":"476_CR13","doi-asserted-by":"crossref","first-page":"1527","DOI":"10.1162\/neco.2006.18.7.1527","volume":"18","author":"E Hinton","year":"2006","unstructured":"Hinton, E., Osindero, S., & Teh, Y. W. (2006). A fast learning algorithm for deep belief nets. Neural Computation, 18(7), 1527\u20131554.","journal-title":"Neural Computation"},{"issue":"5786","key":"476_CR14","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1126\/science.1127647","volume":"313","author":"GE Hinton","year":"2006","unstructured":"Hinton, G. E., & Salakhutdinov, R. S. (2006). Reducing the dimensionality of data with neural networks. Science, 313(5786), 504\u2013507.","journal-title":"Science"},{"key":"476_CR15","unstructured":"ISO\/IEC 13818-7. (1997). Information technology\u2014Generic coding of moving pictures and associated audio information\u2014Part 7: Advanced audio coding (AAC)."},{"issue":"3","key":"476_CR16","doi-asserted-by":"crossref","first-page":"1062","DOI":"10.1109\/TSA.2005.857573","volume":"14","author":"S Kiranyaz","year":"2006","unstructured":"Kiranyaz, S., Qureshi, A. F., & Gabbouj, M. (2006). A generic audio classification and segmentation approach for multimedia indexing and retrieval. IEEE Transactions on Audio, Speech, and Language Processing, 14(3), 1062\u20131081.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"476_CR17","unstructured":"Lee, K. (2006). Identifying cover songs from audio using harmonic representation. Music Information Retrieval Evaluation eXchange (MIREX) extended abstract."},{"key":"476_CR18","unstructured":"Matlab Central, Deep Learning Toolbox [Online]. http:\/\/www.mathworks.com\/matlabcentral\/fileexchange\/38310-deep-learning-toolbox ."},{"key":"476_CR19","doi-asserted-by":"crossref","unstructured":"Mnih, A., & Hinton, G. E. (2005). Learning nonlinear constraints with contrastive backpropagation. In 2005 IEEE international joint conference on neural networks, IJCNN\u201905. Proceedings (pp. 1302\u20131307).","DOI":"10.1109\/IJCNN.2005.1556042"},{"issue":"6","key":"476_CR20","doi-asserted-by":"crossref","first-page":"1088","DOI":"10.1109\/JSTSP.2011.2112333","volume":"5","author":"M Muller","year":"2011","unstructured":"Muller, M., Ellis, D. P. W., Klapuri, A., & Richard, G. (2011). Signal processing for music analysis. IEEE Journal of Selected Topics in Signal Processing, 5(6), 1088\u20131110.","journal-title":"IEEE Journal of Selected Topics in Signal Processing"},{"key":"476_CR21","unstructured":"Nair, V., & Hinton, G. E. (2009). 3D object recognition with deep belief nets. In Proceedings of the 22nd International Conference on Neural Information Processing Systems, NIPS \u201909 (pp. 1339\u20131347)."},{"key":"476_CR22","unstructured":"Ng, A. (2011). Sparse autoencoder. In CS294A lecture notes."},{"key":"476_CR23","doi-asserted-by":"crossref","unstructured":"Patel, N., & Sethi, I. (1996). Audio characterization for video indexing. In Proceedings of SPIE (pp. 373\u2013384).","DOI":"10.1117\/12.234776"},{"key":"476_CR24","unstructured":"Ranzato, M., Boureau, Y., & LeCun,Y. (2007). Sparse feature learning for deep belief networks. In Advances in neural information processing systems 20 (NIPS)."},{"issue":"3","key":"476_CR25","doi-asserted-by":"crossref","first-page":"434","DOI":"10.1109\/TASL.2009.2025099","volume":"18","author":"E Ravelli","year":"2010","unstructured":"Ravelli, E., Richard, G., & Daudet, L. (2010). Audio signal representations for indexing in the transform domain. IEEE Transactions on Audio, Speech, and Language Processing, 18(3), 434\u2013446.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"476_CR26","unstructured":"Riley, M., Heinen, E., & Ghosh, J. (2008). A text retrieval approach to content-based audio retrieval. In Proceedings of international conference on music information retrieval, Philadelphia, Pennsylvaia (pp. 295\u2013300)."},{"key":"476_CR27","unstructured":"Sailer, C., & Dressler, K. (2006). Finding cover songs by melodic similarity. Music Information Retrieval Evaluation eXchange (MIREX) extended abstract"},{"key":"476_CR28","unstructured":"Salakhutdinov, R. (2009). Learning deep generative models. Doctoral dissertation, University of Toronto."},{"key":"476_CR29","unstructured":"Salakhutdinov, R. Nonlinear dimensionality reduction using neural networks. http:\/\/www.cs.toronto.edu\/~rsalakhu\/talks\/NLDR_NIPS06workshop.pdf ."},{"key":"476_CR30","unstructured":"Serra, J., G\u2019omez, E., & Herrera, P. (2008). Transposing chroma representations to a common key. In Proceedings of IEEE CS conference on the use of symbols to represent music and multimedia objects, Citeseer (pp. 45\u201348)."},{"key":"476_CR31","volume-title":"The psychology of music","author":"RN Shepard","year":"1982","unstructured":"Shepard, R. N. (1982). Structural representations of musical pitch. In D. Deutsch (Ed.), The psychology of music (1st ed.). Amsterdam: Swets & Zeitlinger.","edition":"1"},{"key":"476_CR32","unstructured":"Smolensky, P. (1986). Information processing in dynamical systems: Foundations of harmony theory. In D. E. Rumelhart, J. L. McClelland & C. PDP Research Group (Eds.), Parallel distributed processing: Explorations in the microstructure of cognition (Vol. 1, pp. 194\u2013281). Cambridge, MA: MIT Press."},{"key":"476_CR33","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava, N., Hinton, G., Krizhevsky, A., Sutskeve, I., & Salakhutdinov, R. (2014). Dropout: A simple way to prevent neural networks from overfitting. Journal of Machine Learning Research, 15, 1929\u20131958.","journal-title":"Journal of Machine Learning Research"},{"key":"476_CR34","unstructured":"The Covers80 cover song data set [Online]. http:\/\/labrosa.ee.columbia.edu\/projects\/coversongs\/covers80\/ ."},{"key":"476_CR35","doi-asserted-by":"crossref","unstructured":"Tsai, T. H., & Chang, W. C. (2009). Two-stage method for specific audio retrieval based on MP3 compression domain. In Proceedings of IEEE international symposium on circuits and systems (pp. 713\u2013716).","DOI":"10.1109\/ISCAS.2009.5117848"},{"key":"476_CR36","unstructured":"Tsai, T. H., & Wang, Y. T. (2004). Content-based retrieval of audio example on MP3 compression domain. In Proceedings of IEEE 6th workshop on multimedia signal processing (pp. 123\u2013126)."},{"key":"476_CR37","doi-asserted-by":"crossref","unstructured":"Voorhees, E. M. (1999). The TREC-8 question answering track report. In Proceedings of the 8th text retrieval conference (TREC-8).","DOI":"10.6028\/NIST.SP.500-242"},{"issue":"3\u20134","key":"476_CR38","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1016\/0025-5564(78)90099-8","volume":"42","author":"MS Waterman","year":"1978","unstructured":"Waterman, M. S., & Smith, T. F. (1978). RNA secondary structure: A complete mathematical analysis. Mathematical Biosciences, 42(3\u20134), 257\u2013266.","journal-title":"Mathematical Biosciences"},{"key":"476_CR39","doi-asserted-by":"crossref","unstructured":"Yapp, L., & Zick, G. (1997). Speech recognition on MPEG\/audio encoded files. In Proceedings of IEEE international conference multimedia computing and systems (pp. 624\u2013625).","DOI":"10.1109\/MMCS.1997.609787"}],"container-title":["Multidimensional Systems and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11045-017-0476-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11045-017-0476-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11045-017-0476-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,22]],"date-time":"2024-06-22T16:10:42Z","timestamp":1719072642000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11045-017-0476-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,2,21]]},"references-count":39,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2018,7]]}},"alternative-id":["476"],"URL":"https:\/\/doi.org\/10.1007\/s11045-017-0476-x","relation":{},"ISSN":["0923-6082","1573-0824"],"issn-type":[{"value":"0923-6082","type":"print"},{"value":"1573-0824","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,2,21]]}}}