{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,8,21]],"date-time":"2023-08-21T04:26:27Z","timestamp":1692591987764},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"22","license":[{"start":{"date-parts":[[2016,11,13]],"date-time":"2016-11-13T00:00:00Z","timestamp":1478995200000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2017,11]]},"DOI":"10.1007\/s11042-016-4107-6","type":"journal-article","created":{"date-parts":[[2016,11,13]],"date-time":"2016-11-13T21:51:26Z","timestamp":1479073886000},"page":"23225-23238","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Deep feature learning for cover song identification"],"prefix":"10.1007","volume":"76","author":[{"given":"Jiunn-Tsair","family":"Fang","sequence":"first","affiliation":[]},{"given":"Chi-Ting","family":"Day","sequence":"additional","affiliation":[]},{"given":"Pao-Chi","family":"Chang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,11,13]]},"reference":[{"issue":"13","key":"4107_CR1","first-page":"834","volume":"3","author":"AJ Al-Shareef","year":"2008","unstructured":"Al-Shareef AJ, Mohamed EA, Al-Judaibi E (2008) One hour ahead load forecasting using artificial neural network for the western area of Saudi Arabia. Int J Elec Compu Eng 3(13):834\u2013840","journal-title":"Int J Elec Compu Eng"},{"issue":"1","key":"4107_CR2","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1561\/2200000006","volume":"2","author":"Y Bengio","year":"2009","unstructured":"Bengio Y (2009) Learning deep architectures for AI. Found Trends Mach Learning 2(1):1\u2013127","journal-title":"Found Trends Mach Learning"},{"issue":"8","key":"4107_CR3","doi-asserted-by":"crossref","first-page":"1798","DOI":"10.1109\/TPAMI.2013.50","volume":"35","author":"Y Bengio","year":"2013","unstructured":"Bengio Y, Courville A, Vincent P (2013) Representation learning: a review and new perspectives. IEEE Trans Pattern Anal Mach Intell 35(8):1798\u20131828","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"4107_CR4","doi-asserted-by":"crossref","unstructured":"Bertin-Mahieux T, Ellis D (2012) Large-scale cover song recognition using the 2D-Fourier transform magnitude The 13th ISMIR Conference","DOI":"10.1109\/ASPAA.2011.6082307"},{"key":"4107_CR5","unstructured":"Bertin-Mahieux T, Ellis D, Whitman B, Lamere P. (2011) The million song dataset In Proceedings of ISMIR"},{"issue":"18","key":"4107_CR6","doi-asserted-by":"crossref","first-page":"7921","DOI":"10.1007\/s11042-014-2031-1","volume":"74","author":"TM Chang","year":"2014","unstructured":"Chang TM, Hsieh CB, Chang PC (2014) An enhanced direct chord transformation for music retrieval in the AAC domain with window switching. Multimed Tools and Appl 74(18):7921\u20137942","journal-title":"Multimed Tools and Appl"},{"key":"4107_CR7","unstructured":"Ellis DPW (2006) Beat tracking with dynamic programming MIREX 2006 Audio Beat Tracking Contest system description"},{"key":"4107_CR8","unstructured":"Ellis DPW (2007) The \u201ccovers80\u201d cover song data set. [Online]. Available: http:\/\/labrosa.ee.columbia.edu\/projects\/coversongs\/covers80\/"},{"key":"4107_CR9","unstructured":"Ellis D. Dynamic Time Warp (DTW) in Matlab. [Online]. Available: http:\/\/labrosa.ee.columbia.edu\/matlab\/dtw\/"},{"key":"4107_CR10","unstructured":"Ellis DPW, and Cotton C (2006) The 2007 LABROSA cover song detection system. Music Information Retrieval Evaluation eXchange (MIREX) extended abstract"},{"key":"4107_CR11","doi-asserted-by":"crossref","unstructured":"Ellis DPW, Poliner GE (2007) Identifying cover songs with chroma features and dynamic programming beat tracking. IEEE Int. Conf. Acoustic, Speech and Signal Processing (ICASSP), Honolulu, HI, 1429 \u20131432","DOI":"10.1109\/ICASSP.2007.367348"},{"key":"4107_CR12","unstructured":"Fujishima T (1999) Realtime chord recognition of musical sound: a system using common lisp music. Int. Comput. Music Conf., Beijing 464\u2013467"},{"issue":"7","key":"4107_CR13","doi-asserted-by":"crossref","first-page":"1527","DOI":"10.1162\/neco.2006.18.7.1527","volume":"18","author":"GE Hinton","year":"2006","unstructured":"Hinton GE, Osindero S, Teh YW (2006) A fast learning algorithm for deep belief nets. Neural Comput 18(7):1527\u20131554","journal-title":"Neural Comput"},{"issue":"5786","key":"4107_CR14","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1126\/science.1127647","volume":"313","author":"GE Hinton","year":"2006","unstructured":"Hinton GE, Salakhutdinov RS (2006) Reducing the dimensionality of data with neural networks. Science 313(5786):504\u2013507","journal-title":"Science"},{"key":"4107_CR15","unstructured":"Humphrey EJ, Nieto O, Bello JP (2013) Data driven and discriminative projections for large-scale cover song identification. The 14th ISMIR Conference: 149\u2013154"},{"issue":"3","key":"4107_CR16","doi-asserted-by":"crossref","first-page":"358","DOI":"10.1007\/s10115-004-0154-9","volume":"7","author":"E Keogh","year":"2005","unstructured":"Keogh E, Ratanamahatana CA (2005) Exact indexing of dynamic time warping. Knowl Inf Syst 7(3):358\u2013386","journal-title":"Knowl Inf Syst"},{"key":"4107_CR17","unstructured":"Lee K (2006) Identifying Cover Songs from Audio Using Harmonic Representation. Music Information Retrieval Evaluation eXchange (MIREX) extended abstract"},{"key":"4107_CR18","doi-asserted-by":"crossref","unstructured":"Nieto O, Bello JP (2014) Music segment similarity using 2D-Fourier magnitude coefficients. IEEE Int. Conf. on Acoustics, Speech and Signal Processing (ICASSP): 664\u2013668","DOI":"10.1109\/ICASSP.2014.6853679"},{"key":"4107_CR19","unstructured":"Palm RB (2012) Deep learning toolbox, [Online]. Available: http:\/\/www.mathworks.com\/matlabcentral\/fileexchange\/38310-deep-learning-toolbox"},{"key":"4107_CR20","unstructured":"Ranzato M, Boureau Y, LeCun Y (2007) Sparse feature learning for deep belief networks. Advances in Neural Information Processing Systems 20 (NIPS)"},{"key":"4107_CR21","doi-asserted-by":"crossref","unstructured":"Ranzato M, Poultney C, Chopra S, LeCun Y (2006) Efficient learning of sparse representations with an energy-based model NIPS","DOI":"10.7551\/mitpress\/7503.003.0147"},{"key":"4107_CR22","doi-asserted-by":"crossref","unstructured":"Ravuri S, Ellis DPW (2010) Cover song detection: From high scores to general classification. IEEE Int. Conf. on Acoustics, Speech and Signal Processing (ICASSP), Dallas, Texas, U.S.A. 65\u201368","DOI":"10.1109\/ICASSP.2010.5496214"},{"key":"4107_CR23","doi-asserted-by":"crossref","unstructured":"Rifai S, Vincent P, Muller X, Glorot X, Bengio Y (2011a) Contractive auto-encoders: Explicit invariance during feature extraction ICML","DOI":"10.1007\/978-3-642-23783-6_41"},{"key":"4107_CR24","unstructured":"Riley M, Heinen E, Ghosh J (2008) A text retrieval approach to content-based audio retrieval. Int. Conf. on Music Information Retrieval, Philadelphia, Pennsylvaia, U.S.A. 295\u2013300"},{"key":"4107_CR25","unstructured":"Sailer C, Dressler K (2006) Finding cover songs by melodic similarity. Music Information Retrieval Evaluation eXchange (MIREX) extended abstract"},{"key":"4107_CR26","volume-title":"Learning deep generative models doctoral dissertation","author":"R Salakhutdinov","year":"2009","unstructured":"Salakhutdinov R (2009) Learning deep generative models doctoral dissertation. University of Toronto, Toronto"},{"key":"4107_CR27","unstructured":"Salakhutdinov R Nonlinear dimensionality reduction using neural networks. Available: http:\/\/www.cs.toronto.edu\/~rsalakhu\/talks\/NLDR_NIPS06workshop.pdf"},{"key":"4107_CR28","doi-asserted-by":"crossref","unstructured":"Serr\u00e0 J, G\u00f3mez E (2008) Audio cover song identification based on tonal sequence alignment. IEEE Int. Conf. on Acoustics, Speech and Signal Processing (ICASSP), Las Vegas, Nevada, U.S.A. 61\u201364","DOI":"10.1109\/ICASSP.2008.4517546"},{"issue":"6","key":"4107_CR29","doi-asserted-by":"crossref","first-page":"1138","DOI":"10.1109\/TASL.2008.924595","volume":"16","author":"J Serr\u00e0","year":"2008","unstructured":"Serr\u00e0 J, G\u00f3mez E, Herrera P, Serra X (2008) Chroma binary similarity and local alignment applied to cover song identification. IEEE Trans Audio Speech Lang Process 16(6):1138\u20131151","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"14","key":"4107_CR30","doi-asserted-by":"crossref","first-page":"307","DOI":"10.1007\/978-3-642-11674-2_14","volume":"274","author":"J Serr\u00e0","year":"2010","unstructured":"Serr\u00e0 J, G\u00f3mez E, Herrera P (2010) Audio cover song identification and similarity: background, approaches, evaluation, and beyond. Adv Music Inf Retr 274(14):307\u2013332","journal-title":"Adv Music Inf Retr"},{"key":"4107_CR31","doi-asserted-by":"crossref","unstructured":"Shepard RN (1982) Structural representations of musical pitch. In Deutsch, D, editor, The Psychology of Music, First Edition. Swets & Zeitlinger","DOI":"10.1016\/B978-0-12-213562-0.50015-2"},{"key":"4107_CR32","unstructured":"Signal processing toolbox, time-dependent frequency analysis (specgram). [Online]. Available: http:\/\/faculty.petra.ac.id\/resmana\/private\/matlab-help\/toolbox\/signal\/specgram.html"},{"key":"4107_CR33","unstructured":"Smolensky P (1986) Information processing in dynamical systems: foundations of harmony theory. in Parallel Distributed Processing: Explorations in the Microstructure of Cognition, D. E. Rumelhart, J. L. McClelland, and C. PDP Research Group, Eds. Cambridge, MA, USA: MIT Press 194\u2013281"},{"key":"4107_CR34","unstructured":"Tralie CJ, Bendich P (2015) Cover song identification with timbral shape sequences. arXiv preprint arXiv:1507.05143"},{"key":"4107_CR35","doi-asserted-by":"crossref","unstructured":"Vincent P, Larochelle H, Bengio Y, Manzagol, PA. (2008) Extracting and composing robust features with denoising autoencoders ICML","DOI":"10.1145\/1390156.1390294"},{"key":"4107_CR36","unstructured":"Voorhees EM (1999) Proceedings of the 8th Text Retrieval Conference. TREC-8 question answering track report. 77\u201382"},{"key":"4107_CR37","unstructured":"Wang R, Han C, Wu Y, Guo T (2014) Fingerprint classification based on depth neural network. arXiv preprint arXiv:1409.5188"},{"key":"4107_CR38","volume-title":"Cover, grove music online","author":"R Witmer","year":"2006","unstructured":"Witmer R, Marks A (2006) In: Macy L (ed) Cover, grove music online. Oxford Univ. Press, Oxford"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11042-016-4107-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-016-4107-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-016-4107-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,20]],"date-time":"2023-08-20T22:09:22Z","timestamp":1692569362000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11042-016-4107-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,11,13]]},"references-count":38,"journal-issue":{"issue":"22","published-print":{"date-parts":[[2017,11]]}},"alternative-id":["4107"],"URL":"https:\/\/doi.org\/10.1007\/s11042-016-4107-6","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,11,13]]}}}