{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T23:48:41Z","timestamp":1743119321690,"version":"3.40.3"},"publisher-location":"Cham","reference-count":47,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031703805"},{"type":"electronic","value":"9783031703812"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70381-2_13","type":"book-chapter","created":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T10:01:54Z","timestamp":1725184914000},"page":"199-214","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Boosting Protein Language Models with\u00a0Negative Sample Mining"],"prefix":"10.1007","author":[{"given":"Yaoyao","family":"Xu","sequence":"first","affiliation":[]},{"given":"Xinjian","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Xiaozhuang","family":"Song","sequence":"additional","affiliation":[]},{"given":"Benyou","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Tianshu","family":"Yu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,22]]},"reference":[{"issue":"12","key":"13_CR1","doi-asserted-by":"publisher","first-page":"1315","DOI":"10.1038\/s41592-019-0598-1","volume":"16","author":"EC Alley","year":"2019","unstructured":"Alley, E.C., Khimulya, G., Biswas, S., AlQuraishi, M., Church, G.M.: Unified rational protein engineering with sequence-based deep representation learning. Nat. Methods 16(12), 1315\u20131322 (2019)","journal-title":"Nat. Methods"},{"issue":"6557","key":"13_CR2","doi-asserted-by":"publisher","first-page":"871","DOI":"10.1126\/science.abj8754","volume":"373","author":"M Baek","year":"2021","unstructured":"Baek, M., et al.: Accurate prediction of protein structures and interactions using a three-track neural network. Science 373(6557), 871\u2013876 (2021)","journal-title":"Science"},{"issue":"1","key":"13_CR3","doi-asserted-by":"publisher","first-page":"304","DOI":"10.1093\/nar\/28.1.304","volume":"28","author":"A Bairoch","year":"2000","unstructured":"Bairoch, A.: The enzyme database in 2000. Nucleic Acids Res. 28(1), 304\u2013305 (2000)","journal-title":"Nucleic Acids Res."},{"issue":"6","key":"13_CR4","doi-asserted-by":"publisher","first-page":"654","DOI":"10.1016\/j.cels.2021.05.017","volume":"12","author":"T Bepler","year":"2021","unstructured":"Bepler, T., Berger, B.: Learning the protein language: evolution, structure, and function. Cell Syst. 12(6), 654\u2013669 (2021)","journal-title":"Cell Syst."},{"key":"13_CR5","doi-asserted-by":"crossref","unstructured":"Bonetta, R., Valentino, G.: Machine learning techniques for protein function prediction. Proteins: Struct. Function Bioinform. 88(3), 397\u2013413 (2020)","DOI":"10.1002\/prot.25832"},{"key":"13_CR6","doi-asserted-by":"crossref","unstructured":"Burley, S.K., Berman, H.M., Kleywegt, G.J., Markley, J.L., Nakamura, H., Velankar, S.: Protein data bank (pdb): the single global macromolecular structure archive. Protein crystallography: methods and protocols, pp. 627\u2013641 (2017)","DOI":"10.1007\/978-1-4939-7000-1_26"},{"key":"13_CR7","doi-asserted-by":"crossref","unstructured":"Chen, D., Tian, X., Zhou, B., Gao, J., et\u00a0al.: Profold: protein fold classification with additional structural features and a novel ensemble classifier. BioMed Res. Int. 2016 (2016)","DOI":"10.1155\/2016\/6802832"},{"key":"13_CR8","doi-asserted-by":"crossref","unstructured":"Dallago, C., et al.: Flip: benchmark tasks in fitness landscape inference for proteins. Advances in Neural Information Processing Systems, pp. 2021\u201311 (2021)","DOI":"10.1101\/2021.11.09.467890"},{"issue":"4","key":"13_CR9","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1038\/nrg3414","volume":"14","author":"D De Juan","year":"2013","unstructured":"De Juan, D., Pazos, F., Valencia, A.: Emerging methods in protein co-evolution. Nat. Rev. Genet. 14(4), 249\u2013261 (2013)","journal-title":"Nat. Rev. Genet."},{"key":"13_CR10","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"13_CR11","doi-asserted-by":"crossref","unstructured":"Elnaggar, A., et\u00a0al.: Prottrans: toward understanding the language of life through self-supervised learning. TPAMI 44(10) (2021)","DOI":"10.1109\/TPAMI.2021.3095381"},{"key":"13_CR12","doi-asserted-by":"crossref","unstructured":"Elnaggar, A., et al.: Prottrans: towards cracking the language of lifes code through self-supervised deep learning and high performance computing. TPAMI, 1 (2021)","DOI":"10.1101\/2020.07.12.199554"},{"key":"13_CR13","doi-asserted-by":"crossref","unstructured":"Evans, R., et\u00a0al.: Protein complex prediction with alphafold-multimer. biorxiv pp. 2021\u201310 (2021)","DOI":"10.1101\/2021.10.04.463034"},{"issue":"10","key":"13_CR14","doi-asserted-by":"publisher","first-page":"1087","DOI":"10.1038\/s42256-023-00721-6","volume":"5","author":"X Fang","year":"2023","unstructured":"Fang, X., et al.: A method for multiple-sequence-alignment-free protein structure prediction using a protein language model. Nature Mach. Intell. 5(10), 1087\u20131096 (2023)","journal-title":"Nature Mach. Intell."},{"issue":"6","key":"13_CR15","doi-asserted-by":"publisher","first-page":"521","DOI":"10.1038\/s42256-022-00499-z","volume":"4","author":"N Ferruz","year":"2022","unstructured":"Ferruz, N., H\u00f6cker, B.: Controllable protein design with language models. Nature Mach. Intell. 4(6), 521\u2013532 (2022)","journal-title":"Nature Mach. Intell."},{"issue":"1","key":"13_CR16","doi-asserted-by":"publisher","first-page":"3168","DOI":"10.1038\/s41467-021-23303-9","volume":"12","author":"V Gligorijevi\u0107","year":"2021","unstructured":"Gligorijevi\u0107, V., et al.: Structure-based protein function prediction using graph convolutional networks. Nat. Commun. 12(1), 3168 (2021)","journal-title":"Nat. Commun."},{"issue":"1","key":"13_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s12859-019-3220-8","volume":"20","author":"M Heinzinger","year":"2019","unstructured":"Heinzinger, M., et al.: Modeling aspects of the language of life through transfer-learning protein sequences. BMC Bioinform. 20(1), 1\u201317 (2019)","journal-title":"BMC Bioinform."},{"issue":"6526","key":"13_CR18","doi-asserted-by":"publisher","first-page":"284","DOI":"10.1126\/science.abd7331","volume":"371","author":"B Hie","year":"2021","unstructured":"Hie, B., Zhong, E.D., Berger, B., Bryson, B.: Learning the language of viral evolution and escape. Science 371(6526), 284\u2013288 (2021)","journal-title":"Science"},{"issue":"4","key":"13_CR19","doi-asserted-by":"publisher","first-page":"274","DOI":"10.1016\/j.cels.2022.01.003","volume":"13","author":"BL Hie","year":"2022","unstructured":"Hie, B.L., Yang, K.K., Kim, P.S.: Evolutionary velocity with protein language models predicts evolutionary dynamics of diverse proteins. Cell Syst. 13(4), 274\u2013285 (2022)","journal-title":"Cell Syst."},{"key":"13_CR20","doi-asserted-by":"crossref","unstructured":"Hsu, C., et al.: Learning inverse folding from millions of predicted structures. In: ICML, pp. 8946\u20138970. PMLR (2022)","DOI":"10.1101\/2022.04.10.487779"},{"key":"13_CR21","first-page":"38873","volume":"35","author":"M Hu","year":"2022","unstructured":"Hu, M., et al.: Exploring evolution-aware & -free protein language models as protein function predictors. Adv. Neural. Inf. Process. Syst. 35, 38873\u201338884 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"2","key":"13_CR22","doi-asserted-by":"publisher","first-page":"184","DOI":"10.1093\/bioinformatics\/btr638","volume":"28","author":"DT Jones","year":"2012","unstructured":"Jones, D.T., Buchan, D.W., Cozzetto, D., Pontil, M.: Psicov: precise structural contact prediction using sparse inverse covariance estimation on large multiple sequence alignments. Bioinformatics 28(2), 184\u2013190 (2012)","journal-title":"Bioinformatics"},{"issue":"7873","key":"13_CR23","doi-asserted-by":"publisher","first-page":"583","DOI":"10.1038\/s41586-021-03819-2","volume":"596","author":"J Jumper","year":"2021","unstructured":"Jumper, J., et al.: Highly accurate protein structure prediction with alphafold. Nature 596(7873), 583\u2013589 (2021)","journal-title":"Nature"},{"issue":"15","key":"13_CR24","doi-asserted-by":"publisher","first-page":"2605","DOI":"10.1093\/bioinformatics\/bty166","volume":"34","author":"S Khurana","year":"2018","unstructured":"Khurana, S., Rawi, R., Kunji, K., Chuang, G.Y., Bensmail, H., Mall, R.: Deepsol: a deep learning framework for sequence-based protein solubility prediction. Bioinformatics 34(15), 2605\u20132613 (2018)","journal-title":"Bioinformatics"},{"key":"13_CR25","unstructured":"Lin, Z., et\u00a0al.: Language models of protein sequences at the scale of evolution enable accurate structure prediction. Science (2023)"},{"issue":"6637","key":"13_CR26","doi-asserted-by":"publisher","first-page":"1123","DOI":"10.1126\/science.ade2574","volume":"379","author":"Z Lin","year":"2023","unstructured":"Lin, Z., et al.: Evolutionary-scale prediction of atomic-level protein structure with a language model. Science 379(6637), 1123\u20131130 (2023)","journal-title":"Science"},{"issue":"12","key":"13_CR27","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0028766","volume":"6","author":"DS Marks","year":"2011","unstructured":"Marks, D.S., et al.: Protein 3d structure computed from evolutionary sequence variation. PLoS ONE 6(12), e28766 (2011)","journal-title":"PLoS ONE"},{"key":"13_CR28","doi-asserted-by":"crossref","unstructured":"Meng, Q., Guo, F., Tang, J.: Improved structure-related prediction for insufficient homologous proteins using msa enhancement and pre-trained language model. Briefings Bioinform. 24(4), bbad217 (2023)","DOI":"10.1093\/bib\/bbad217"},{"issue":"8","key":"13_CR29","doi-asserted-by":"publisher","first-page":"3389","DOI":"10.1091\/mbc.e02-11-0704","volume":"14","author":"A Rajagopal","year":"2003","unstructured":"Rajagopal, A., Simon, S.M.: Subcellular localization and activity of multidrug resistance proteins. Mol. Biol. Cell 14(8), 3389\u20133399 (2003)","journal-title":"Mol. Biol. Cell"},{"key":"13_CR30","doi-asserted-by":"crossref","unstructured":"Rao, J., He, H., Lin, J.: Noise-contrastive estimation for answer selection with deep neural networks. In: Proceedings of the 25th ACM International on Conference on Information and Knowledge Management, pp. 1913\u20131916 (2016)","DOI":"10.1145\/2983323.2983872"},{"key":"13_CR31","doi-asserted-by":"crossref","unstructured":"Rao, R., et al.: Evaluating protein transfer learning with tape. Advances in neural information processing systems 32 (2019)","DOI":"10.1101\/676825"},{"key":"13_CR32","doi-asserted-by":"crossref","unstructured":"Rao, R., Meier, J., Sercu, T., Ovchinnikov, S., Rives, A.: Transformer protein language models are unsupervised structure learners. Biorxiv, pp. 2020\u201312 (2020)","DOI":"10.1101\/2020.12.15.422761"},{"key":"13_CR33","doi-asserted-by":"crossref","unstructured":"Rao, R.M., et al.: Msa transformer. In: ICML, pp. 8844\u20138856. PMLR (2021)","DOI":"10.1101\/2021.02.12.430858"},{"issue":"15","key":"13_CR34","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.2016239118","volume":"118","author":"A Rives","year":"2021","unstructured":"Rives, A., et al.: Biological structure and function emerge from scaling unsupervised learning to 250 million protein sequences. Proc. Natl. Acad. Sci. 118(15), e2016239118 (2021)","journal-title":"Proc. Natl. Acad. Sci."},{"key":"13_CR35","doi-asserted-by":"crossref","unstructured":"Shrivastava, A., Gupta, A., Girshick, R.: Training region-based object detectors with online hard example mining. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 761\u2013769 (2016)","DOI":"10.1109\/CVPR.2016.89"},{"key":"13_CR36","doi-asserted-by":"crossref","unstructured":"Strokach, A., Becerra, D., Corbi-Verge, C., Perez-Riba, A., Kim, P.M.: Fast and flexible protein design using deep graph neural networks. Cell Syst. 11(4) (2020)","DOI":"10.1016\/j.cels.2020.08.016"},{"issue":"7972","key":"13_CR37","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1038\/s41586-023-06221-2","volume":"620","author":"H Wang","year":"2023","unstructured":"Wang, H., et al.: Scientific discovery in the age of artificial intelligence. Nature 620(7972), 47\u201360 (2023)","journal-title":"Nature"},{"key":"13_CR38","doi-asserted-by":"crossref","unstructured":"Wang, X., Xu, Y., He, X., Cao, Y., Wang, M., Chua, T.S.: Reinforced negative sampling over knowledge graph for recommendation. In: WWW, pp. 99\u2013109 (2020)","DOI":"10.1145\/3366423.3380098"},{"key":"13_CR39","doi-asserted-by":"crossref","unstructured":"Wang, Y., Song, J., Dai, Q., Duan, X.: Hierarchical negative sampling based graph contrastive learning approach for drug-disease association prediction. IEEE J. Biomed. Health Inform. (2024)","DOI":"10.1109\/JBHI.2024.3360437"},{"key":"13_CR40","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1016\/j.artmed.2017.03.001","volume":"83","author":"L Wei","year":"2017","unstructured":"Wei, L., Xing, P., Zeng, J., Chen, J., Su, R., Guo, F.: Improved prediction of protein-protein interactions using novel negative samples, features, and an ensemble classifier. Artif. Intell. Med. 83, 67\u201374 (2017)","journal-title":"Artif. Intell. Med."},{"key":"13_CR41","unstructured":"Xu, M., Yuan, X., Miret, S., Tang, J.: Protst: multi-modality learning of protein sequences and biomedical texts. ICML (2023)"},{"key":"13_CR42","unstructured":"Xu, M., et al.: Peer: a comprehensive and multi-task benchmark for protein sequence understanding. NIPS (2022)"},{"key":"13_CR43","doi-asserted-by":"crossref","unstructured":"Ying, R., He, R., Chen, K., Eksombatchai, P., Hamilton, W.L., Leskovec, J.: Graph convolutional neural networks for web-scale recommender systems. In: SIGKDD, pp. 974\u2013983 (2018)","DOI":"10.1145\/3219819.3219890"},{"key":"13_CR44","doi-asserted-by":"crossref","unstructured":"Yu, T., Cui, H., Li, J.C., Luo, Y., Jiang, G., Zhao, H.: Enzyme function prediction using contrastive learning. Science (2023)","DOI":"10.1126\/science.adf2465"},{"key":"13_CR45","doi-asserted-by":"crossref","unstructured":"Zhang, Z., et al.: Protein language models learn evolutionary statistics of interacting sequence motifs. bioRxiv, pp. 2024\u201301 (2024)","DOI":"10.1101\/2024.01.30.577970"},{"key":"13_CR46","unstructured":"Zhang, Z., et al.: Protein representation learning by geometric structure pretraining. In: ICLR (2023)"},{"key":"13_CR47","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Deng, Y., Xue, D., Zhou, Y., Ye, F., Gu, Q.: Structure-informed language models are protein designers. In: ICML, pp. 2023\u201302 (2023)","DOI":"10.1101\/2023.02.03.526917"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases. Applied Data Science Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70381-2_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T10:09:38Z","timestamp":1725185378000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70381-2_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031703805","9783031703812"],"references-count":47,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70381-2_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"22 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vilnius","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lithuania","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2024.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}