{"id":"https://openalex.org/W2914436728","doi":"https://doi.org/10.1145/3308558.3313602","title":"Learn2Clean: Optimizing the Sequence of Tasks for Web Data Preparation","display_name":"Learn2Clean: Optimizing the Sequence of Tasks for Web Data Preparation","publication_year":2019,"publication_date":"2019-05-13","ids":{"openalex":"https://openalex.org/W2914436728","doi":"https://doi.org/10.1145/3308558.3313602","mag":"2914436728"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3308558.3313602","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091872345","display_name":"Laure Berti\u2010\u00c9quille","orcid":"https://orcid.org/0000-0002-8046-0570"},"institutions":[{"id":"https://openalex.org/I4210166444","display_name":"Institut de Recherche pour le D\u00e9veloppement","ror":"https://ror.org/05q3vnk25","country_code":"FR","type":"government","lineage":["https://openalex.org/I2802818602","https://openalex.org/I4210090127","https://openalex.org/I4210131494","https://openalex.org/I4210166444"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Laure Berti-Equille","raw_affiliation_strings":["Institute of Research for Development (IRD), France, France"],"affiliations":[{"raw_affiliation_string":"Institute of Research for Development (IRD), France, France","institution_ids":["https://openalex.org/I4210166444"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5091872345"],"corresponding_institution_ids":["https://openalex.org/I4210166444"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":36,"citation_normalized_percentile":{"value":0.999397,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"2580","last_page":"2586"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality Assessment and Improvement","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality Assessment and Improvement","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Techniques for Data Analysis and Machine Learning","score":0.993,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Learning with Noisy Labels in Machine Learning","score":0.9862,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-cleaning","display_name":"Data Cleaning","score":0.688094},{"id":"https://openalex.org/keywords/data-pre-processing","display_name":"Data pre-processing","score":0.6576973},{"id":"https://openalex.org/keywords/meta-learning","display_name":"Meta-Learning","score":0.524553},{"id":"https://openalex.org/keywords/robust-learning","display_name":"Robust Learning","score":0.501005},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.47440243}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8387705},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.70403075},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6701423},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.6576973},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.57890904},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.47440243},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.46572492},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.45603833},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.40381366},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3963666},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07990053},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0752044},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3308558.3313602","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://hal.archives-ouvertes.fr/hal-02197265","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":["Centre National de la Recherche Scientifique"],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.47,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":21,"referenced_works":["https://openalex.org/W1533198000","https://openalex.org/W1973137455","https://openalex.org/W2001085207","https://openalex.org/W2017337590","https://openalex.org/W2048745453","https://openalex.org/W2071049875","https://openalex.org/W2086465016","https://openalex.org/W2090859349","https://openalex.org/W2115098571","https://openalex.org/W2123329666","https://openalex.org/W2132862423","https://openalex.org/W2142284002","https://openalex.org/W2144182447","https://openalex.org/W2165528679","https://openalex.org/W2182361439","https://openalex.org/W2315227351","https://openalex.org/W2437617937","https://openalex.org/W2757938417","https://openalex.org/W2793058134","https://openalex.org/W3011120880","https://openalex.org/W4213306721"],"related_works":["https://openalex.org/W4248881655","https://openalex.org/W3092506759","https://openalex.org/W3010890513","https://openalex.org/W2989490741","https://openalex.org/W2482165163","https://openalex.org/W2390914021","https://openalex.org/W2389417819","https://openalex.org/W2367545121","https://openalex.org/W138569904","https://openalex.org/W120741642"],"abstract_inverted_index":{"Data":[0],"cleaning":[1,59,69],"and":[2,16,27,39,60],"preparation":[3],"has":[4],"been":[5],"a":[6,24,28,33],"long-standing":[7],"challenge":[8],"in":[9],"data":[10,36,41,58],"science":[11],"to":[12,46],"avoid":[13],"incorrect":[14],"results":[15],"misleading":[17],"conclusions":[18],"obtained":[19],"from":[20],"dirty":[21],"data.":[22],"For":[23],"given":[25,29],"dataset":[26],"machine":[30,62],"learning-based":[31],"task,":[32],"plethora":[34],"of":[35],"preprocessing":[37],"techniques":[38],"alternative":[40],"curation":[42],"strategies":[43],"may":[44],"lead":[45],"dramatically":[47],"different":[48],"outputs":[49],"with":[50],"unequal":[51],"quality":[52],"performance.":[53],"Most":[54],"current":[55],"work":[56],"on":[57,66],"automated":[61],"learning,":[63],"however,":[64],"focus":[65],"developing":[67],"either":[68],"algorithms":[70],"or":[71,74],"user-guided":[72],"systems":[73]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2914436728","counts_by_year":[{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":2}],"updated_date":"2024-12-04T09:29:02.809480","created_date":"2019-02-21"}