{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,8,30]],"date-time":"2024-08-30T11:36:55Z","timestamp":1725017815095},"reference-count":49,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2020,10,1]],"date-time":"2020-10-01T00:00:00Z","timestamp":1601510400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Information Sciences"],"published-print":{"date-parts":[[2020,10]]},"DOI":"10.1016\/j.ins.2020.06.032","type":"journal-article","created":{"date-parts":[[2020,6,18]],"date-time":"2020-06-18T06:32:18Z","timestamp":1592461938000},"page":"215-228","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":15,"special_numbering":"C","title":["Similarity and diversity induced paired projection for cross-modal retrieval"],"prefix":"10.1016","volume":"539","author":[{"given":"Jinxing","family":"Li","sequence":"first","affiliation":[]},{"given":"Mu","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0002-9457-7956","authenticated-orcid":false,"given":"Guangming","family":"Lu","sequence":"additional","affiliation":[]},{"given":"Bob","family":"Zhang","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0002-1404-1378","authenticated-orcid":false,"given":"Hongpeng","family":"Yin","sequence":"additional","affiliation":[]},{"given":"David","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.ins.2020.06.032_b0005","unstructured":"Galen Andrew, Raman Arora, Jeff Bilmes, Karen Livescu, Deep canonical correlation analysis, in: International Conference on Machine Learning, 2013, pp. 1247\u20131255."},{"key":"10.1016\/j.ins.2020.06.032_b0010","unstructured":"David M. Blei, Andrew Y. Ng, Michael I. Jordan, Latent dirichlet allocation, J. Mach. Learn. Res. 3 (Jan) (2003) 993\u20131022."},{"key":"10.1016\/j.ins.2020.06.032_b0015","unstructured":"Xiaochun Cao, Changqing Zhang, Huazhu Fu, Si Liu, Hua Zhang, Diversity-induced multi-view subspace clustering, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2015, pp. 586\u2013594."},{"key":"10.1016\/j.ins.2020.06.032_b0020","doi-asserted-by":"crossref","unstructured":"Kamalika Chaudhuri, Sham M. Kakade, Karen Livescu, Karthik Sridharan, Multi-view clustering via canonical correlation analysis, in: Proceedings of the 26th Annual International Conference on Machine Learning, 2009, ACM, pp. 129\u2013136.","DOI":"10.1145\/1553374.1553391"},{"issue":"2","key":"10.1016\/j.ins.2020.06.032_b0025","doi-asserted-by":"crossref","first-page":"208","DOI":"10.1109\/TMM.2015.2508146","article-title":"Discriminative dictionary learning with common label alignment for cross-modal retrieval","volume":"18","author":"Deng","year":"2015","journal-title":"IEEE Trans. Multimedia"},{"key":"10.1016\/j.ins.2020.06.032_b0030","unstructured":"Carl Henrik Ek, PHTND Lawrence, Shared Gaussian process latent variable models. PhD thesis, 2009."},{"issue":"1","key":"10.1016\/j.ins.2020.06.032_b0035","doi-asserted-by":"crossref","first-page":"189","DOI":"10.1109\/TIP.2014.2375634","article-title":"Discriminative shared gaussian processes for multiview and view-invariant facial expression recognition","volume":"24","author":"Eleftheriadis","year":"2015","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.ins.2020.06.032_b0040","doi-asserted-by":"crossref","unstructured":"Ali Farhadi, Mohsen Hejrati, Mohammad Amin Sadeghi, Peter Young, Cyrus Rashtchian, Julia Hockenmaier, David Forsyth, Every picture tells a story: generating sentences from images, in: European Conference on Computer Vision, Springer, 2010, pp. 15\u201329.","DOI":"10.1007\/978-3-642-15561-1_2"},{"key":"10.1016\/j.ins.2020.06.032_b0045","unstructured":"Lianli Gao, Xiangpeng Li, Jingkuan Song, Heng Tao Shen, Hierarchical lstms with adaptive attention for visual captioning, IEEE Trans. Pattern Anal. Mach. Intell., 2019."},{"key":"10.1016\/j.ins.2020.06.032_b0050","doi-asserted-by":"crossref","unstructured":"Arthur Gretton, Olivier Bousquet, Alex Smola, Bernhard Scholkopf, Measuring statistical dependence with hilbert-schmidt norms, in: ALT, vol. 16, Springer, 2005, pp. 63\u201378.","DOI":"10.1007\/11564089_7"},{"key":"10.1016\/j.ins.2020.06.032_b0055","unstructured":"Shuhang Gu, Lei Zhang, Wangmeng Zuo, Xiangchu Feng, Projective dictionary pair learning for pattern classification, in: Advances in Neural Information Processing Systems, 2014, pp. 793\u2013801."},{"issue":"1","key":"10.1016\/j.ins.2020.06.032_b0060","doi-asserted-by":"crossref","first-page":"188","DOI":"10.1109\/TPAMI.2015.2435740","article-title":"Multi-view discriminant analysis","volume":"38","author":"Kan","year":"2016","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.ins.2020.06.032_b0065","unstructured":"Inayatullah Khan, Amir Saffari, Horst Bischof, Tvgraz: multi-modal learning of object categories by combining textual and visual features, in: AAPR Workshop, 2009, pp. 213\u2013224."},{"key":"10.1016\/j.ins.2020.06.032_b0070","unstructured":"Shaishav Kumar, Raghavendra Udupa, Learning hash functions for cross-view similarity search, in: IJCAI Proceedings-International Joint Conference on Artificial Intelligence, vol. 22, 2011, p. 1360."},{"key":"10.1016\/j.ins.2020.06.032_b0075","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"4242","article-title":"Self-supervised adversarial hashing networks for cross-modal retrieval","author":"Li","year":"2018"},{"key":"10.1016\/j.ins.2020.06.032_b0080","article-title":"Shared linear encoder-based multikernel gaussian process latent variable model for visual classification","author":"Li","year":"2019","journal-title":"IEEE Trans. Cybern."},{"key":"10.1016\/j.ins.2020.06.032_b0085","series-title":"Thirty-Second AAAI Conference on Artificial Intelligence","article-title":"A probabilistic hierarchical model for multi-view and multi-feature classification","author":"Li","year":"2018"},{"issue":"8","key":"10.1016\/j.ins.2020.06.032_b0090","doi-asserted-by":"crossref","first-page":"2886","DOI":"10.1109\/TCYB.2018.2831457","article-title":"Visual classification with multikernel shared gaussian process latent variable model","volume":"49","author":"Li","year":"2018","journal-title":"IEEE Trans. Cybern."},{"key":"10.1016\/j.ins.2020.06.032_b0095","unstructured":"Jinxing Li, Bob Zhang, Guangming Lu, Jane You, Yong Xu, Feng Wu, David Zhang, Relaxed asymmetric deep hashing learning: point-to-angle matching, IEEE Trans. Neural Networks Learn. Syst. 2019."},{"key":"10.1016\/j.ins.2020.06.032_b0100","unstructured":"Jinxing Li, Bob Zhang, Guangming Lu, David Zhang, Shared linear encoder-based gaussian process latent variable model for visual classification, in: Proceedings of the 26th ACM International Conference on Multimedia, 2018, pp. 26\u201334."},{"key":"10.1016\/j.ins.2020.06.032_b0105","doi-asserted-by":"crossref","first-page":"113372","DOI":"10.1109\/ACCESS.2019.2927524","article-title":"Dual asymmetric deep hashing learning","volume":"7","author":"Li","year":"2019","journal-title":"IEEE Access"},{"key":"10.1016\/j.ins.2020.06.032_b0110","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1016\/j.inffus.2018.02.005","article-title":"Generative multi-view and multi-feature learning for classification","volume":"45","author":"Li","year":"2019","journal-title":"Inf. Fusion"},{"key":"10.1016\/j.ins.2020.06.032_b0115","article-title":"Shared autoencoder gaussian process latent variable model for visual classification","author":"Li","year":"2017","journal-title":"IEEE Trans. Neural Networks Learn. Syst."},{"key":"10.1016\/j.ins.2020.06.032_b0120","doi-asserted-by":"crossref","first-page":"191","DOI":"10.1016\/j.ins.2016.09.031","article-title":"Joint similar and specific learning for diabetes mellitus and impaired glucose regulation detection","volume":"384","author":"Li","year":"2017","journal-title":"Inf. Sci."},{"key":"10.1016\/j.ins.2020.06.032_b0125","unstructured":"Aditya Krishna Menon, Didi Surian, Sanjay Chawla, Cross-modal retrieval: a pairwise classification approach, in: Proceedings of the 2015 SIAM International Conference on Data Mining, SIAM, 2015, pp. 199\u2013207."},{"key":"10.1016\/j.ins.2020.06.032_b0130","unstructured":"Alexis Mignon, Fr\u00e9d\u00e9ric Jurie, Cmml: a new metric learning approach for cross modal matching, in: Asian Conference on Computer Vision, 2012, pp. 14\u2013pages."},{"key":"10.1016\/j.ins.2020.06.032_b0135","unstructured":"Donglin Niu, Jennifer G. Dy, Michael I. Jordan, Multiple non-redundant spectral clustering views, in: Proceedings of the 27th International Conference on Machine Learning (ICML-10), 2010, pp. 831\u2013838."},{"issue":"7","key":"10.1016\/j.ins.2020.06.032_b0140","doi-asserted-by":"crossref","first-page":"1340","DOI":"10.1109\/TPAMI.2013.180","article-title":"Iterative discovery of multiple alternativeclustering views","volume":"36","author":"Niu","year":"2014","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.ins.2020.06.032_b0145","doi-asserted-by":"crossref","unstructured":"Yash Patel, Lluis Gomez, Mar\u00e7al Rusi\u00f1ol, Dimosthenis Karatzas, C.V. Jawahar, Self-supervised visual representations for cross-modal retrieval, in: Proceedings of the 2019 on International Conference on Multimedia Retrieval, ACM, 2019, pp. 182\u2013186.","DOI":"10.1145\/3323873.3325035"},{"key":"10.1016\/j.ins.2020.06.032_b0150","doi-asserted-by":"crossref","unstructured":"Jose Costa Pereira, Emanuele Coviello, Gabriel Doyle, Nikhil Rasiwasia, Gert R.G. Lanckriet, Roger Levy, Nuno Vasconcelos, On the role of correlation and abstraction in cross-modal multimedia retrieval. IEEE Trans. Pattern Anal. Mach. Intell. 36 (3) (2014) 521\u2013535.","DOI":"10.1109\/TPAMI.2013.142"},{"key":"10.1016\/j.ins.2020.06.032_b0155","doi-asserted-by":"crossref","unstructured":"Jose Costa Pereira, Nuno Vasconcelos, On the regularization of image semantics by modal expansion, in: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), IEEE, 2012, pp. 3093\u20133099.","DOI":"10.1109\/CVPR.2012.6248041"},{"key":"10.1016\/j.ins.2020.06.032_b0160","doi-asserted-by":"crossref","unstructured":"N. Rasiwasia, J. Costa Pereira, E. Coviello, G. Doyle, G.R.G. Lanckriet, R. Levy, N. Vasconcelos, A new approach to cross-modal multimedia retrieval, in: ACM International Conference on Multimedia, 2010, pp. 251\u2013260.","DOI":"10.1145\/1873951.1873987"},{"key":"10.1016\/j.ins.2020.06.032_b0165","doi-asserted-by":"crossref","unstructured":"Nawid Sayed, Biagio Brattoli, Bj\u00f6rn Ommer, Cross and learn: Cross-modal self-supervision, in: German Conference on Pattern Recognition, Springer, 2018, pp. 228\u2013243.","DOI":"10.1007\/978-3-030-12939-2_17"},{"key":"10.1016\/j.ins.2020.06.032_b0170","doi-asserted-by":"crossref","unstructured":"Abhishek Sharma, David W. Jacobs, Bypassing synthesis: Pls for face recognition with pose, low-resolution and sketch, in: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), IEEE, 2011, pp. 593\u2013600.","DOI":"10.1109\/CVPR.2011.5995350"},{"key":"10.1016\/j.ins.2020.06.032_b0175","doi-asserted-by":"crossref","unstructured":"Abhishek Sharma, Abhishek Kumar, Hal Daume, David W. Jacobs, Generalized multiview analysis: a discriminative latent space, in: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), IEEE, 2012, pp. 2160\u20132167.","DOI":"10.1109\/CVPR.2012.6247923"},{"key":"10.1016\/j.ins.2020.06.032_b0180","doi-asserted-by":"crossref","unstructured":"Guoli Song, Shuhui Wang, Qingming Huang, Qi Tian, Similarity gaussian process latent variable model for multi-modal data analysis, in: Proceedings of the IEEE International Conference on Computer Vision, 2015, pp. 4050\u20134058.","DOI":"10.1109\/ICCV.2015.461"},{"key":"10.1016\/j.ins.2020.06.032_b0185","unstructured":"Jingkuan Song, Yuyu Guo, Lianli Gao, Xuelong Li, Alan Hanjalic, Heng Tao Shen, From deterministic to generative: Multimodal stochastic rnns for video captioning, IEEE Trans. Neural Networks Learn. Syst. 30(10) (2018) 3047\u20133058."},{"key":"10.1016\/j.ins.2020.06.032_b0190","doi-asserted-by":"crossref","unstructured":"Jingkuan Song, Yang Yang, Yi Yang, Zi Huang, Heng Tao Shen, Inter-media hashing for large-scale retrieval from heterogeneous data sources, in: Proceedings of the 2013 ACM SIGMOD International Conference on Management of Data, ACM, 2013, pp. 785\u2013796.","DOI":"10.1145\/2463676.2465274"},{"key":"10.1016\/j.ins.2020.06.032_b0195","doi-asserted-by":"crossref","unstructured":"Chunwei Tian, Lunke Fei, Wenxian Zheng, Yong Xu, Wangmeng Zuo, Chia-Wen Lin, Deep learning on image denoising: an overview, 2019, arXiv preprint arXiv:1912.13171.","DOI":"10.1016\/j.neunet.2020.07.025"},{"key":"10.1016\/j.ins.2020.06.032_b0200","doi-asserted-by":"crossref","unstructured":"Chunwei Tian, Yong Xu, Zuoyong Li, Wangmeng Zuo, Lunke Fei, Hong Liu, Attention-guided cnn for image denoising. Neural Networks (2020).","DOI":"10.1016\/j.neunet.2019.12.024"},{"key":"10.1016\/j.ins.2020.06.032_b0205","doi-asserted-by":"crossref","first-page":"461","DOI":"10.1016\/j.neunet.2019.08.022","article-title":"Image denoising using deep cnn with batch renormalization","volume":"121","author":"Tian","year":"2020","journal-title":"Neural Networks"},{"key":"10.1016\/j.ins.2020.06.032_b0210","doi-asserted-by":"crossref","unstructured":"Kaiye Wang, Ran He, Wei Wang, Liang Wang, Tieniu Tan, Learning coupled feature spaces for cross-modal matching, in: Proceedings of the IEEE International Conference on Computer Vision, 2013, pp. 2088\u20132095.","DOI":"10.1109\/ICCV.2013.261"},{"key":"10.1016\/j.ins.2020.06.032_b0215","unstructured":"Weiran Wang, Raman Arora, Karen Livescu, Jeff Bilmes, On deep multi-view representation learning, in: Proceedings of the 32nd International Conference on Machine Learning, 2015, pp. 1083\u20131092."},{"key":"10.1016\/j.ins.2020.06.032_b0220","doi-asserted-by":"crossref","unstructured":"Weiran Wang, Raman Arora, Karen Livescu, Jeff A. Bilmes, Unsupervised learning of acoustic features via deep canonical correlation analysis, in: 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, 2015, pp. 4590\u20134594.","DOI":"10.1109\/ICASSP.2015.7178840"},{"key":"10.1016\/j.ins.2020.06.032_b0225","unstructured":"David Weenink, Canonical correlation analysis, in: Proceedings of the Institute of Phonetic Sciences of the University of Amsterdam, vol. 25, Citeseer, 2003, pp. 81\u201399."},{"issue":"11","key":"10.1016\/j.ins.2020.06.032_b0230","doi-asserted-by":"crossref","first-page":"5292","DOI":"10.1109\/TNNLS.2018.2793863","article-title":"Shared predictive cross-modal deep quantization","volume":"29","author":"Yang","year":"2018","journal-title":"IEEE Trans. Neural Networks Learn. Syst."},{"key":"10.1016\/j.ins.2020.06.032_b0235","doi-asserted-by":"crossref","unstructured":"Xiaohua Zhai, Yuxin Peng, Jianguo Xiao, Heterogeneous metric learning with joint graph regularization for cross-media retrieval, in: AAAI, 2013.","DOI":"10.1609\/aaai.v27i1.8464"},{"key":"10.1016\/j.ins.2020.06.032_b0240","doi-asserted-by":"crossref","unstructured":"Jian Zhang, Yuxin Peng, Mingkuan Yuan, Unsupervised generative adversarial cross-modal hashing, in: Thirty-Second AAAI Conference on Artificial Intelligence, 2018.","DOI":"10.1609\/aaai.v32i1.11263"},{"key":"10.1016\/j.ins.2020.06.032_b0245","unstructured":"Guoxu Zhou, Andrzej Cichocki, Yu Zhang, Danilo P. Mandic, Group component analysis for multiblock data: Common and individual feature extraction, IEEE Trans. Neural Networks Learn. Syst. 27 (11) (2016) 2426\u20132439."}],"container-title":["Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0020025520306186?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0020025520306186?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2022,10,29]],"date-time":"2022-10-29T04:59:04Z","timestamp":1667019544000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0020025520306186"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,10]]},"references-count":49,"alternative-id":["S0020025520306186"],"URL":"https:\/\/doi.org\/10.1016\/j.ins.2020.06.032","relation":{},"ISSN":["0020-0255"],"issn-type":[{"value":"0020-0255","type":"print"}],"subject":[],"published":{"date-parts":[[2020,10]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Similarity and diversity induced paired projection for cross-modal retrieval","name":"articletitle","label":"Article Title"},{"value":"Information Sciences","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.ins.2020.06.032","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2020 Elsevier Inc. All rights reserved.","name":"copyright","label":"Copyright"}]}}