{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T23:57:48Z","timestamp":1726099068368},"publisher-location":"Cham","reference-count":31,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030602383"},{"type":"electronic","value":"9783030602390"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-60239-0_3","type":"book-chapter","created":{"date-parts":[[2020,9,29]],"date-time":"2020-09-29T05:03:14Z","timestamp":1601355794000},"page":"32-46","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["DAFEE: A Scalable Distributed Automatic Feature Engineering Algorithm for Relational Datasets"],"prefix":"10.1007","author":[{"ORCID":"http:\/\/orcid.org\/0000-0002-6547-005X","authenticated-orcid":false,"given":"Wenqian","family":"Zhao","sequence":"first","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0003-2075-2106","authenticated-orcid":false,"given":"Xiangxiang","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0003-4576-0524","authenticated-orcid":false,"given":"Guoping","family":"Rong","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0002-4429-363X","authenticated-orcid":false,"given":"Mufeng","family":"Lin","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0003-2858-9104","authenticated-orcid":false,"given":"Chen","family":"Lin","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0001-9127-168X","authenticated-orcid":false,"given":"Yifan","family":"Yang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,9,29]]},"reference":[{"key":"3_CR1","doi-asserted-by":"crossref","unstructured":"Armbrust, M., et al.: Spark SQL: relational data processing in spark. In: Proceedings of the 2015 ACM SIGMOD International Conference on Management of Data, pp. 1383\u20131394 (2015)","DOI":"10.1145\/2723372.2742797"},{"issue":"4","key":"3_CR2","doi-asserted-by":"publisher","first-page":"537","DOI":"10.1109\/72.298224","volume":"5","author":"R Battiti","year":"1994","unstructured":"Battiti, R.: Using mutual information for selecting features in supervised neural net learning. IEEE Trans. Neural Netw. 5(4), 537\u2013550 (1994)","journal-title":"IEEE Trans. Neural Netw."},{"key":"3_CR3","doi-asserted-by":"crossref","unstructured":"Chen, T., Guestrin, C.: XGBoost: a scalable tree boosting system. In: Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 785\u2013794 (2016)","DOI":"10.1145\/2939672.2939785"},{"key":"3_CR4","volume-title":"Statistics and Data Analysis in Geology","author":"JC Davis","year":"1986","unstructured":"Davis, J.C., Sampson, R.J.: Statistics and Data Analysis in Geology, vol. 646. Wiley, New York (1986)"},{"key":"3_CR5","doi-asserted-by":"publisher","first-page":"176","DOI":"10.1016\/j.ins.2011.11.039","volume":"189","author":"O Dor","year":"2012","unstructured":"Dor, O., Reich, Y.: Strengthening learning algorithms by feature discovery. Inf. Sci. 189, 176\u2013190 (2012)","journal-title":"Inf. Sci."},{"key":"3_CR6","volume-title":"Pattern Classification","author":"RO Duda","year":"2012","unstructured":"Duda, R.O., Hart, P.E., Stork, D.G.: Pattern Classification. Wiley, Hoboken (2012)"},{"issue":"1","key":"3_CR7","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1109\/TSMCB.2004.841426","volume":"35","author":"H Guo","year":"2005","unstructured":"Guo, H., Jack, L.B., Nandi, A.K.: Feature generation using genetic programming with application to fault classification. IEEE Trans. Syst. Man Cybern. Part B (Cybern.) 35(1), 89\u201399 (2005)","journal-title":"IEEE Trans. Syst. Man Cybern. Part B (Cybern.)"},{"issue":"1\u20133","key":"3_CR8","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1023\/A:1012487302797","volume":"46","author":"I Guyon","year":"2002","unstructured":"Guyon, I., Weston, J., Barnhill, S., Vapnik, V.: Gene selection for cancer classification using support vector machines. Mach. Learn. 46(1\u20133), 389\u2013422 (2002)","journal-title":"Mach. Learn."},{"key":"3_CR9","unstructured":"He, X., Cai, D., Niyogi, P.: Laplacian score for feature selection. In: Advances in Neural Information Processing Systems, pp. 507\u2013514 (2006)"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"He, X., et al.: Practical lessons from predicting clicks on ads at Facebook. In: Proceedings of the Eighth International Workshop on Data Mining for Online Advertising, pp. 1\u20139 (2014)","DOI":"10.1145\/2648584.2648589"},{"key":"3_CR11","unstructured":"Hutter, F., Hoos, H.H., Leyton-Brown, K.: Sequential model-based optimization for general algorithm configuration (extended version). Technical report TR-2010-10. Computer Science, University of British Columbia (2010)"},{"key":"3_CR12","doi-asserted-by":"crossref","unstructured":"Kanter, J.M., Veeramachaneni, K.: Deep feature synthesis: towards automating data science endeavors. In: 2015 IEEE International Conference on Data Science and Advanced Analytics (DSAA), pp. 1\u201310. IEEE (2015)","DOI":"10.1109\/DSAA.2015.7344858"},{"key":"3_CR13","doi-asserted-by":"crossref","unstructured":"Katz, G., Shin, E.C.R., Song, D.: ExploreKit: automatic feature generation and selection. In: 2016 IEEE 16th International Conference on Data Mining (ICDM), pp. 979\u2013984. IEEE (2016)","DOI":"10.1109\/ICDM.2016.0123"},{"key":"3_CR14","doi-asserted-by":"crossref","unstructured":"Kaul, A., Maheshwary, S., Pudi, V.: AutoLearn\u2014automated feature generation and selection. In: 2017 IEEE International Conference on Data Mining (ICDM), pp. 217\u2013226. IEEE (2017)","DOI":"10.1109\/ICDM.2017.31"},{"key":"3_CR15","unstructured":"Ke, G., et al.: LightGBM: a highly efficient gradient boosting decision tree. In: Advances in Neural Information Processing Systems, pp. 3146\u20133154 (2017)"},{"key":"3_CR16","doi-asserted-by":"crossref","unstructured":"Khurana, U., Samulowitz, H., Turaga, D.: Feature engineering for predictive modeling using reinforcement learning. In: Thirty-Second AAAI Conference on Artificial Intelligence (2018)","DOI":"10.1609\/aaai.v32i1.11678"},{"key":"3_CR17","doi-asserted-by":"crossref","unstructured":"Khurana, U., Turaga, D., Samulowitz, H., Parthasrathy, S.: Cognito: automated feature engineering for supervised learning. In: 2016 IEEE 16th International Conference on Data Mining Workshops (ICDMW), pp. 1304\u20131307. IEEE (2016)","DOI":"10.1109\/ICDMW.2016.0190"},{"key":"3_CR18","unstructured":"Lam, H.T., Minh, T.N., Sinn, M., Buesser, B., Wistuba, M.: Neural feature learning from relational database. arXiv preprint arXiv:1801.05372 (2018)"},{"key":"3_CR19","unstructured":"Lam, H.T., Thiebaut, J.M., Sinn, M., Chen, B., Mai, T., Alkan, O.: One button machine for automating feature engineering in relational databases. arXiv preprint arXiv:1706.00327 (2017)"},{"key":"3_CR20","doi-asserted-by":"crossref","unstructured":"Leather, H., Bonilla, E., O\u2019Boyle, M.: Automatic feature generation for machine learning based optimizing compilation. In: 2009 International Symposium on Code Generation and Optimization, pp. 81\u201391. IEEE (2009)","DOI":"10.1109\/CGO.2009.21"},{"key":"3_CR21","doi-asserted-by":"crossref","unstructured":"Lewis, D.D.: Feature selection and feature extraction for text categorization. In: Proceedings of the Workshop on Speech and Natural Language, pp. 212\u2013217. Association for Computational Linguistics (1992)","DOI":"10.3115\/1075527.1075574"},{"issue":"6","key":"3_CR22","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3136625","volume":"50","author":"J Li","year":"2017","unstructured":"Li, J., et al.: Feature selection: a data perspective. ACM Comput. Surv. (CSUR) 50(6), 1\u201345 (2017)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"3_CR23","unstructured":"Liu, H., Setiono, R.: Chi2: feature selection and discretization of numeric attributes. In: Proceedings of 7th IEEE International Conference on Tools with Artificial Intelligence, pp. 388\u2013391. IEEE (1995)"},{"issue":"1","key":"3_CR24","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1023\/A:1014046307775","volume":"49","author":"S Markovitch","year":"2002","unstructured":"Markovitch, S., Rosenstein, D.: Feature generation using general constructor functions. Mach. Learn. 49(1), 59\u201398 (2002)","journal-title":"Mach. Learn."},{"issue":"3","key":"3_CR25","doi-asserted-by":"publisher","first-page":"301","DOI":"10.1109\/34.990133","volume":"24","author":"P Mitra","year":"2002","unstructured":"Mitra, P., Murthy, C., Pal, S.K.: Unsupervised feature selection using feature similarity. IEEE Trans. Pattern Anal. Mach. Intell. 24(3), 301\u2013312 (2002)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3_CR26","doi-asserted-by":"crossref","unstructured":"Nargesian, F., Samulowitz, H., Khurana, U., Khalil, E.B., Turaga, D.S.: Learning feature engineering for classification. In: IJCAI, pp. 2529\u20132535 (2017)","DOI":"10.24963\/ijcai.2017\/352"},{"key":"3_CR27","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1016\/j.patcog.2016.11.003","volume":"64","author":"R Sheikhpour","year":"2017","unstructured":"Sheikhpour, R., Sarram, M.A., Gharaghani, S., Chahooki, M.A.Z.: A survey on semi-supervised feature selection methods. Pattern Recogn. 64, 141\u2013158 (2017)","journal-title":"Pattern Recogn."},{"key":"3_CR28","unstructured":"Tang, J., Alelyani, S., Liu, H.: Feature selection for classification: a review. In: Data Classification: Algorithms and Applications, p. 37 (2014)"},{"issue":"1","key":"3_CR29","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/s12293-015-0173-y","volume":"8","author":"B Tran","year":"2015","unstructured":"Tran, B., Xue, B., Zhang, M.: Genetic programming for feature construction and selection in classification on high-dimensional data. Memetic Comput. 8(1), 3\u201315 (2015). https:\/\/doi.org\/10.1007\/s12293-015-0173-y","journal-title":"Memetic Comput."},{"key":"3_CR30","unstructured":"Yuanfei, L., et al.: AutoCross: automatic feature crossing for tabular data in real-world applications. arXiv preprint arXiv:1904.12857 (2019)"},{"issue":"10\u201310","key":"3_CR31","first-page":"95","volume":"10","author":"M Zaharia","year":"2010","unstructured":"Zaharia, M., Chowdhury, M., Franklin, M.J., Shenker, S., Stoica, I., et al.: Spark: cluster computing with working sets. HotCloud 10(10\u201310), 95 (2010)","journal-title":"HotCloud"}],"container-title":["Lecture Notes in Computer Science","Algorithms and Architectures for Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-60239-0_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,20]],"date-time":"2022-11-20T22:10:50Z","timestamp":1668982250000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-60239-0_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030602383","9783030602390"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-60239-0_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"29 September 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICA3PP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Algorithms and Architectures for Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"New York, NY","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 October 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ica3pp2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.cloud-conf.net\/ica3pp2020\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"495","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"142","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"29% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"305","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"10","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}