{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:19:30Z","timestamp":1740122370293,"version":"3.37.3"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2021,2,11]],"date-time":"2021-02-11T00:00:00Z","timestamp":1613001600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,2,11]],"date-time":"2021-02-11T00:00:00Z","timestamp":1613001600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100000879","name":"Alfred P. Sloan Foundation","doi-asserted-by":"crossref","id":[{"id":"10.13039\/100000879","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["DP2GM137413"],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Data Min Knowl Disc"],"published-print":{"date-parts":[[2021,5]]},"DOI":"10.1007\/s10618-020-00732-6","type":"journal-article","created":{"date-parts":[[2021,2,11]],"date-time":"2021-02-11T15:48:58Z","timestamp":1613058538000},"page":"748-795","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["ForestDSH: a universal hash design for discrete probability distributions"],"prefix":"10.1007","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2015-6064","authenticated-orcid":false,"given":"Arash Gholami","family":"Davoodi","sequence":"first","affiliation":[]},{"given":"Sean","family":"Chang","sequence":"additional","affiliation":[]},{"given":"Hyun Gon","family":"Yoo","sequence":"additional","affiliation":[]},{"given":"Anubhav","family":"Baweja","sequence":"additional","affiliation":[]},{"given":"Mihir","family":"Mongia","sequence":"additional","affiliation":[]},{"given":"Hosein","family":"Mohimani","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,2,11]]},"reference":[{"issue":"6928","key":"732_CR1","doi-asserted-by":"publisher","first-page":"198","DOI":"10.1038\/nature01511","volume":"422","author":"R Aebersold","year":"2003","unstructured":"Aebersold R, Mann M (2003) Mass spectrometry-based proteomics. Nature 422(6928):198\u2013207","journal-title":"Nature"},{"key":"732_CR2","unstructured":"Anagnostopoulos E, Emiris IZ, Psarros I (2015) Low-quality dimension reduction and high-dimensional approximate nearest neighbor. In: 31st international symposium on computational geometry (SoCG 2015), Schloss Dagstuhl-Leibniz-Zentrum fuer Informatik"},{"key":"732_CR3","unstructured":"Andoni A, Indyk P, Laarhoven T, Razenshteyn I, Schmidt L (2015) Practical and optimal LSH for angular distance. In: Advances in neural information processing systems, pp 1225\u20131233"},{"key":"732_CR4","doi-asserted-by":"crossref","unstructured":"Andoni A, Laarhoven T, Razenshteyn I, Waingarten E (2017) Optimal hashing-based time-space trade-offs for approximate near neighbors. In: Proceedings of the twenty-eighth annual ACM-SIAM symposium on discrete algorithms. SIAM, pp 47\u201366","DOI":"10.1137\/1.9781611974782.4"},{"key":"732_CR5","doi-asserted-by":"crossref","unstructured":"Andoni A, Naor A, Nikolov A, Razenshteyn I, Waingarten E (2018) Data-dependent hashing via nonlinear spectral gaps. In: Proceedings of the 50th annual ACM SIGACT symposium on theory of computing, pp 787\u2013800","DOI":"10.1145\/3188745.3188846"},{"key":"732_CR6","doi-asserted-by":"crossref","unstructured":"Andoni A, Razenshteyn I (2015) Optimal data-dependent hashing for approximate near neighbors. In: Proceedings of the forty-seventh annual ACM symposium on theory of computing, pp 793\u2013801","DOI":"10.1145\/2746539.2746553"},{"key":"732_CR7","doi-asserted-by":"crossref","unstructured":"Bawa M, Condie T, Ganesan P (2005) LSH forest: self-tuning indexes for similarity search. In: Proceedings of the 14th international conference on world wide web, pp 651\u2013660","DOI":"10.1145\/1060745.1060840"},{"issue":"9","key":"732_CR8","doi-asserted-by":"publisher","first-page":"509","DOI":"10.1145\/361002.361007","volume":"18","author":"JL Bentley","year":"1975","unstructured":"Bentley JL (1975) Multidimensional binary search trees used for associative searching. Commun ACM 18(9):509\u2013517","journal-title":"Commun ACM"},{"key":"732_CR9","doi-asserted-by":"crossref","unstructured":"Beyer K, Goldstein J, Ramakrishnan R, Shaft U (1999) When is \u201cnearest neighbor\u201d meaningful? In: International conference on database theory. Springer, pp 217\u2013235","DOI":"10.1007\/3-540-49257-7_15"},{"key":"732_CR10","unstructured":"Bhatia K, Jain H, Kar P, Varma M, Jain P (2015) Sparse local embeddings for extreme multi-label classification. In: Advances in neural information processing systems, pp 730\u2013738"},{"key":"732_CR11","unstructured":"Castelli V, Li CS, Thomasian A (2000) Searching multidimensional indexes using associated clustering and dimension reduction information. U.S. Patent No. 6,134,541"},{"issue":"5","key":"732_CR12","first-page":"1919","volume":"39","author":"A Chakrabarti","year":"2010","unstructured":"Chakrabarti A, Regev O (2010) An optimal randomized cell probe lower bound for approximate nearest neighbor searching. Soc Ind Appl Math SIAM J Comput 39(5):1919\u20131940","journal-title":"Soc Ind Appl Math SIAM J Comput"},{"key":"732_CR13","doi-asserted-by":"crossref","unstructured":"Charikar MS (2002) Similarity estimation techniques from rounding algorithms. In: Proceedings of the thirty-fourth annual ACM symposium on theory of computing, pp 380\u2013388","DOI":"10.1145\/509907.509965"},{"key":"732_CR14","unstructured":"Choromanska AE, Langford J (2015) Logarithmic time online multiclass prediction. In: Advances in neural information processing systems, pp 55\u201363"},{"key":"732_CR15","doi-asserted-by":"crossref","unstructured":"Christiani T, Pagh R (2017) Set similarity search beyond MinHash. In: Proceedings of the 49th annual ACM SIGACT symposium on theory of computing, pp 1094\u20131107","DOI":"10.1145\/3055399.3055443"},{"key":"732_CR16","unstructured":"Dasarathy BV, Sheela BV (1977) Visiting nearest neighbors\u2014a survery of nearest neighbor pattern classification techniques. In: Proceedings of the international conference on cybernetics and society, pp 630\u2013636"},{"issue":"8","key":"732_CR17","doi-asserted-by":"publisher","first-page":"4166","DOI":"10.1109\/TIT.2010.2050814","volume":"56","author":"M Dubiner","year":"2010","unstructured":"Dubiner M (2010) Bucketing coding and information theory for the statistical high-dimensional nearest-neighbor problem. IEEE Trans Inf Theory 56(8):4166\u20134179","journal-title":"IEEE Trans Inf Theory"},{"issue":"10","key":"732_CR18","doi-asserted-by":"publisher","first-page":"6646","DOI":"10.1109\/TIT.2012.2204169","volume":"58","author":"M Dubiner","year":"2012","unstructured":"Dubiner M (2012) A heterogeneous high-dimensional approximate nearest neighbor algorithm. IEEE Trans Inf Theory 58(10):6646\u20136658","journal-title":"IEEE Trans Inf Theory"},{"key":"732_CR19","volume-title":"Pattern classification and scene analysis","author":"RO Duda","year":"1973","unstructured":"Duda RO, Hart PE, Stork DG (1973) Pattern classification and scene analysis, vol 3. Wiley, New York"},{"issue":"7","key":"732_CR20","doi-asserted-by":"publisher","first-page":"587","DOI":"10.1038\/nmeth.1609","volume":"8","author":"AM Frank","year":"2011","unstructured":"Frank AM, Monroe ME, Shah AR, Carver JJ, Bandeira N, Moore RJ, Anderson GA, Smith RD, Pevzner PA (2011) Spectral archives: extending spectral libraries to analyze both identified and unidentified spectra. Nat Methods 8(7):587\u2013591","journal-title":"Nat Methods"},{"issue":"3","key":"732_CR21","doi-asserted-by":"publisher","first-page":"209","DOI":"10.1145\/355744.355745","volume":"3","author":"JH Friedman","year":"1977","unstructured":"Friedman JH, Bentley JL, Finkel RA (1977) An algorithm for finding best matches in logarithmic expected time. ACM Trans Math Softw TOMS 3(3):209\u2013226","journal-title":"ACM Trans Math Softw TOMS"},{"key":"732_CR22","unstructured":"Gionis A, Indyk P, Motwani R (1999) Similarity search in high dimensions via hashing. In: International conference on very large data bases, VLDB, vol 99, pp 518\u2013529"},{"key":"732_CR23","doi-asserted-by":"crossref","unstructured":"Guttman A (1984) R-trees: a dynamic index structure for spatial searching. In: Proceedings of the 1984 ACM SIGMOD international conference on management of data, pp 47\u201357","DOI":"10.1145\/971697.602266"},{"key":"732_CR24","doi-asserted-by":"crossref","unstructured":"Indyk P, Motwani R (1998) Approximate nearest neighbors: towards removing the curse of dimensionality. In: Proceedings of the thirtieth annual ACM symposium on theory of computing, pp 604\u2013613","DOI":"10.1145\/276698.276876"},{"key":"732_CR25","doi-asserted-by":"crossref","unstructured":"Jain H, Prabhu Y, Varma M (2016) Extreme multi-label loss functions for recommendation, tagging, ranking & other missing label applications. In: Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining, pp 935\u2013944","DOI":"10.1145\/2939672.2939756"},{"key":"732_CR26","doi-asserted-by":"publisher","first-page":"5277","DOI":"10.1038\/ncomms6277","volume":"5","author":"S Kim","year":"2014","unstructured":"Kim S, Pevzner PA (2014) MS-GF+ makes progress towards a universal database search tool for proteomics. Nat Commun 5:5277","journal-title":"Nat Commun"},{"key":"732_CR27","unstructured":"Krizhevsky A (2009) Learning multiple layers of features from tiny images. Master\u2019s thesis, University of Toronto"},{"issue":"1","key":"732_CR28","first-page":"2814","volume":"18","author":"W Liu","year":"2017","unstructured":"Liu W, Tsang IW (2017) Making decision trees feasible in ultrahigh feature and label dimensions. J Mach Learn Res 18(1):2814\u20132849","journal-title":"J Mach Learn Res"},{"issue":"3","key":"732_CR29","doi-asserted-by":"publisher","first-page":"e00031-18","DOI":"10.1128\/mSystems.00031-18","volume":"3","author":"D McDonald","year":"2018","unstructured":"McDonald D, Hyde E, Debelius JW, Morton JT, Gonzalez A, Ackermann G, Aksenov AA, Behsaz B, Brennan C, Chen Y, Goldasich LD (2018) American Gut: an open platform for citizen science microbiome research. Msystems 3(3):e00031-18","journal-title":"Msystems"},{"key":"732_CR30","unstructured":"Miltersen PB (1999) Cell probe complexity-a survey. In: Proceedings of the 19th conference on the foundations of software technology and theoretical computer science, advances in data structures workshop, p 2"},{"key":"732_CR31","volume-title":"A non-linear dimensionality reduction method for improving nearest neighbour classification","author":"R Min","year":"2005","unstructured":"Min R (2005) A non-linear dimensionality reduction method for improving nearest neighbour classification. University of Toronto, Toronto"},{"key":"732_CR32","doi-asserted-by":"crossref","unstructured":"Mori G, Belongie S, Malik J (2001) Shape contexts enable efficient retrieval of similar shapes. In: Proceedings of the 2001 IEEE computer society conference on computer vision and pattern recognition. CVPR 2001, IEEE, vol 1, pp I","DOI":"10.1109\/CVPR.2001.990547"},{"key":"732_CR33","unstructured":"Nam J, Menc\u00eda EL, Kim HJ, F\u00fcrnkranz J (2017) Maximizing subset accuracy with recurrent neural networks in multi-label classification. In: Advances in neural information processing systems, pp 5413\u20135423"},{"key":"732_CR34","unstructured":"Niculescu-Mizil A, Abbasnejad E (2017) Label filters for large scale multilabel classification. In: Artificial intelligence and statistics, pp 1448\u20131457"},{"key":"732_CR35","doi-asserted-by":"crossref","unstructured":"Prabhu Y, Varma M (2014) Fastxml: a fast, accurate and stable tree-classifier for extreme multi-label learning. In: Proceedings of the 20th ACM SIGKDD international conference on knowledge discovery and data mining, pp 263\u2013272","DOI":"10.1145\/2623330.2623651"},{"key":"732_CR36","unstructured":"Rai P, Hu C, Henao R, Carin L (2015) Large-scale Bayesian multi-label learning via topic-based label embeddings. In: Advances in neural information processing systems, pp 3222\u20133230"},{"key":"732_CR37","doi-asserted-by":"crossref","unstructured":"Rubinstein A (2018) Hardness of approximate nearest neighbor search. In: Proceedings of the 50th annual ACM SIGACT symposium on theory of computing, pp 1260\u20131268","DOI":"10.1145\/3188745.3188916"},{"key":"732_CR38","doi-asserted-by":"crossref","unstructured":"Shakhnarovich G, Viola P, Darrell T (2003) Fast pose estimation with parameter-sensitive hashing. In: Proceedings of the ninth IEEE international conference on computer vision. IEEE, vol 2, p 750","DOI":"10.1109\/ICCV.2003.1238424"},{"key":"732_CR39","doi-asserted-by":"crossref","unstructured":"Shaw B, Jebara T (2009) Structure preserving embedding. In: Proceedings of the 26th annual international conference on machine learning, pp 937\u2013944","DOI":"10.1145\/1553374.1553494"},{"key":"732_CR40","unstructured":"Shrivastava A, Li P (2014) Asymmetric LSH (ALSH) for sublinear time maximum inner product search (MIPS). In: Advances in neural information processing systems, pp 2321\u20132329"},{"key":"732_CR41","doi-asserted-by":"crossref","unstructured":"Tagami Y (2017) Annexml: approximate nearest neighbor search for extreme multi-label classification. In: Proceedings of the 23rd ACM SIGKDD international conference on knowledge discovery and data mining, pp 455\u2013464","DOI":"10.1145\/3097983.3097987"},{"key":"732_CR42","doi-asserted-by":"crossref","unstructured":"Yen IEH, Huang X, Ravikumar P, Zhong K, Dhillon I (2016) Pd-sparse: a primal and dual sparse approach to extreme multiclass and multilabel classification. In: International conference on machine learning, pp 3069\u20133077","DOI":"10.1145\/3097983.3098083"},{"key":"732_CR43","unstructured":"Yianilos PN (1993) Data structures and algorithms for nearest neighbor search in general metric spaces. In: Symposium on discrete algorithms, SODA, vol 93, pp 311\u2013321"},{"key":"732_CR44","doi-asserted-by":"crossref","unstructured":"Zhou WJ, Yu Y, Zhang ML (2017) Binary linear compression for multi-label classification. In: Proceedings of the twenty-sixth international joint conference on artificial intelligence, IJCAI, pp 3546\u20133552","DOI":"10.24963\/ijcai.2017\/496"}],"container-title":["Data Mining and Knowledge Discovery"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-020-00732-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10618-020-00732-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-020-00732-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,24]],"date-time":"2024-08-24T01:48:15Z","timestamp":1724464095000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10618-020-00732-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,2,11]]},"references-count":44,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2021,5]]}},"alternative-id":["732"],"URL":"https:\/\/doi.org\/10.1007\/s10618-020-00732-6","relation":{},"ISSN":["1384-5810","1573-756X"],"issn-type":[{"type":"print","value":"1384-5810"},{"type":"electronic","value":"1573-756X"}],"subject":[],"published":{"date-parts":[[2021,2,11]]},"assertion":[{"value":"8 November 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 December 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 February 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}