{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,12]],"date-time":"2024-09-12T10:10:09Z","timestamp":1726135809206},"publisher-location":"Cham","reference-count":29,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030855284"},{"type":"electronic","value":"9783030855291"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-85529-1_14","type":"book-chapter","created":{"date-parts":[[2021,9,19]],"date-time":"2021-09-19T23:04:55Z","timestamp":1632092695000},"page":"168-180","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Quantile Encoder: Tackling High Cardinality Categorical Features in Regression Problems"],"prefix":"10.1007","author":[{"given":"Carlos","family":"Mougan","sequence":"first","affiliation":[]},{"given":"David","family":"Masip","sequence":"additional","affiliation":[]},{"given":"Jordi","family":"Nin","sequence":"additional","affiliation":[]},{"given":"Oriol","family":"Pujol","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,9,20]]},"reference":[{"key":"14_CR1","unstructured":"Bruin, J.: newtest: command to compute new test @ONLINE (2011). https:\/\/stats.idre.ucla.edu\/stata\/ado\/analysis\/"},{"key":"14_CR2","unstructured":"Burkov, A.: Machine Learning Engineering, 1 edn. Kindle Direct Publishing (2020)"},{"key":"14_CR3","unstructured":"Carey, G.: Coding categorical variables (2003). http:\/\/psych.colorado.edu\/~carey\/Courses\/PSYC5741\/handouts\/Coding%20Categorical%20Variables%202006-03-03.pdf"},{"key":"14_CR4","doi-asserted-by":"publisher","unstructured":"Cestnik, B., Bratko, I.: On estimating probabilities in tree pruning. In: Kodratoff, Y. (ed.) EWSL 1991. LNCS, vol. 482, pp. 138\u2013150. Springer, Heidelberg (1991). https:\/\/doi.org\/10.1007\/BFb0017010","DOI":"10.1007\/BFb0017010"},{"key":"14_CR5","unstructured":"Charles, J.G.: School of Statistics, University of Minnesota: Stat 5101 Lecture slides (2020). https:\/\/www.stat.umn.edu\/geyer\/f11\/5101\/slides\/s4a.pdf"},{"key":"14_CR6","unstructured":"Masip, D., Mougan, C.: Quantile encoder experiments (2020). https:\/\/github.com\/david26694\/QE_experiments"},{"key":"14_CR7","unstructured":"Masip, D., Mougan, C.: Sktools:tools to extend sklearn, feature engineering based transformers (2020). https:\/\/sktools.readthedocs.io\/"},{"key":"14_CR8","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1038\/scientificamerican0577-119","volume":"236","author":"B Efron","year":"1977","unstructured":"Efron, B., Morris, C.: Stein\u2019s paradox in statistics. Sci. Am. 236, 119\u2013127 (1977). https:\/\/doi.org\/10.1038\/scientificamerican0577-119","journal-title":"Sci. Am."},{"key":"14_CR9","doi-asserted-by":"publisher","unstructured":"Gelman, A., Hill, J.: Data Analysis Using Regression and Multilevel\/Hierarchical Models. Analytical Methods for Social Research. Cambridge University Press, Cambridge (2006). https:\/\/doi.org\/10.1017\/CBO9780511790942","DOI":"10.1017\/CBO9780511790942"},{"key":"14_CR10","unstructured":"G\u00e9ron, A.: Hands-on machine learning with Scikit-Learn and TensorFlow : Concepts, Tools, and Techniques to Build Intelligent systems. O\u2019Reilly Media, Sebastopol (2017)"},{"key":"14_CR11","doi-asserted-by":"crossref","unstructured":"Jaynes, E.T.: Probability Theory: The Logic of Science. Cambridge University Press, Cambridge (2003)","DOI":"10.1017\/CBO9780511790423"},{"key":"14_CR12","unstructured":"Kaggle: Kickstarter projects (2020). https:\/\/www.kaggle.com\/kemical\/kickstarter-projects. [Online; accessed 20-October-2020]"},{"key":"14_CR13","unstructured":"CMS.gov Centers for Medicare & Medicaid Services: Medical payments dataset (2020). Data retrieved from Center for Medicare and Medicaid Services, https:\/\/www.cms.gov\/OpenPayments\/Explore-the-Data\/Dataset-Downloads"},{"key":"14_CR14","unstructured":"The Turing Way Community: The Turing Way: A Handbook for Reproducible Data Science (2019). https:\/\/doi.org\/10.5281\/zenodo.3233986"},{"issue":"1","key":"14_CR15","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1145\/507533.507538","volume":"3","author":"D Micci-Barreca","year":"2001","unstructured":"Micci-Barreca, D.: A preprocessing scheme for high-cardinality categorical attributes in classification and prediction problems. SIGKDD Explor. Newsl. 3(1), 27\u201332 (2001)","journal-title":"SIGKDD Explor. Newsl."},{"issue":"381","key":"14_CR16","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1080\/01621459.1983.10477920","volume":"78","author":"CN Morris","year":"1983","unstructured":"Morris, C.N.: Parametric empirical bayes inference: theory and applications. J. Am. Stat. Assoc. 78(381), 47\u201355 (1983)","journal-title":"J. Am. Stat. Assoc."},{"key":"14_CR17","unstructured":"Pargent, F., Bischl, B., Thomas, J.: A benchmark experiment on how to encode categorical features in predictive modeling. Master\u2019s thesis, School of Statistics (2019)"},{"key":"14_CR18","unstructured":"Prokhorenkova, L., Gusev, G., Vorobev, A., Veronika Dorogush, A., Gulin, A.: CatBoost: unbiased boosting with categorical features. arXiv e-prints arXiv:1706.09516 (2017)"},{"key":"14_CR19","unstructured":"Slakey, A., Salas, D., Schamroth, Y.: Encoding categorical variables with conjugate bayesian models for WeWork lead scoring engine (2019)"},{"key":"14_CR20","unstructured":"Slakey, A., Salas, D., Schamroth, Y.: Encoding categorical variables with conjugate bayesian models for WeWork lead scoring engine. arXiv e-prints arXiv:1904.13001 (2019)"},{"key":"14_CR21","unstructured":"Stackoverflow: Developer survey results 2018 (2018). https:\/\/insights.stackoverflow.com\/survey\/2018\/"},{"key":"14_CR22","unstructured":"Stackoverflow: Developer survey results 2019 (2019). https:\/\/insights.stackoverflow.com\/survey\/2019\/"},{"key":"14_CR23","doi-asserted-by":"publisher","unstructured":"Tutz, G.: Regression for Categorical Data. Cambridge Series in Statistical and Probabilistic Mathematics. Cambridge University Press, Cambridge (2011). https:\/\/doi.org\/10.1017\/CBO9780511842061","DOI":"10.1017\/CBO9780511842061"},{"key":"14_CR24","first-page":"589","volume":"16","author":"L Wang","year":"2006","unstructured":"Wang, L., Zhu, J., Zou, H.: The doubly regularized support vector machine. Statistica Sinica 16, 589\u2013615 (2006)","journal-title":"Statistica Sinica"},{"key":"14_CR25","unstructured":"Wikipedia contributors: Additive smoothing \u2013 Wikipedia, the free encyclopedia (2020). https:\/\/en.wikipedia.org\/w\/index.php?title=Additive_smoothing&oldid=937083796"},{"key":"14_CR26","doi-asserted-by":"publisher","unstructured":"Wilcoxon, F.: Individual comparisons by ranking methods. In: Kotz, S., Johnson, N.L. (eds.) Breakthroughs in Statistics. Springer Series in Statistics (Perspectives in Statistics). Springer, New York (1992). https:\/\/doi.org\/10.1007\/978-1-4612-4380-9_16","DOI":"10.1007\/978-1-4612-4380-9_16"},{"key":"14_CR27","unstructured":"Will McGinnis: category encoders :a library of sklearn compatible categorical variable encoders (2020). https:\/\/contrib.scikit-learn.org\/"},{"issue":"1","key":"14_CR28","doi-asserted-by":"publisher","first-page":"320","DOI":"10.1177\/0081175015570097","volume":"45","author":"X Zhou","year":"2015","unstructured":"Zhou, X.: Shrinkage estimation of log-odds ratios for comparing mobility tables. Sociol. Methodol. 45(1), 320\u2013356 (2015)","journal-title":"Sociol. Methodol."},{"key":"14_CR29","doi-asserted-by":"crossref","unstructured":"Zou, H., Hastie, T.: Regularization and variable selection via the elastic net. J. R. Stat. Soc. Ser. B 67, 301\u2013320 (2005)","DOI":"10.1111\/j.1467-9868.2005.00503.x"}],"container-title":["Lecture Notes in Computer Science","Modeling Decisions for Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-85529-1_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T19:20:14Z","timestamp":1710357614000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-85529-1_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030855284","9783030855291"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-85529-1_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"20 September 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MDAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Modeling Decisions for Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ume\u00e5","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Sweden","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 September 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mdai2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/mdai.cat\/mdai2021\/index.php","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"None","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"50","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"20","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"40% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually. There were also 3 invited papers.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}