{"id":"https://openalex.org/W3013319350","doi":"https://doi.org/10.1007/s10115-020-01467-y","title":"A scalable and effective rough set theory-based approach for big data pre-processing","display_name":"A scalable and effective rough set theory-based approach for big data pre-processing","publication_year":2020,"publication_date":"2020-05-02","ids":{"openalex":"https://openalex.org/W3013319350","doi":"https://doi.org/10.1007/s10115-020-01467-y","mag":"3013319350"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10115-020-01467-y","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10115-020-01467-y.pdf","source":{"id":"https://openalex.org/S81770430","display_name":"Knowledge and Information Systems","issn_l":"0219-3116","issn":["0219-3116","0219-1377"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310319900"],"host_organization_lineage_names":["Springer Nature","Springer Science+Business Media"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10115-020-01467-y.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018448962","display_name":"Zaineb Chelly Dagdia","orcid":"https://orcid.org/0000-0002-2551-6586"},"institutions":[{"id":"https://openalex.org/I16038530","display_name":"Aberystwyth University","ror":"https://ror.org/015m2p889","country_code":"GB","type":"education","lineage":["https://openalex.org/I16038530"]},{"id":"https://openalex.org/I4210140729","display_name":"Institut Sup\u00e9rieur de Gestion de Tunis","ror":"https://ror.org/04w9mdw91","country_code":"TN","type":"education","lineage":["https://openalex.org/I108714496","https://openalex.org/I4210140729"]}],"countries":["GB","TN"],"is_corresponding":false,"raw_author_name":"Zaineb Chelly\u00a0Dagdia","raw_affiliation_strings":["Aberystwyth University (Penglais, Aberystwyth, Ceredigion, SY23 3FL - United Kingdom)","ISG - Institut Sup\u00e9rieur de Gestion de Tunis [Tunis] (41 Avenue de la Libert\u00e9, cit\u00e9 Bouchoucha, le Bardo, 2000 - Tunisia)","MULTISPEECH - Speech Modeling for Facilitating Oral-Based Communication (France)"],"affiliations":[{"raw_affiliation_string":"MULTISPEECH - Speech Modeling for Facilitating Oral-Based Communication (France)","institution_ids":[]},{"raw_affiliation_string":"Aberystwyth University (Penglais, Aberystwyth, Ceredigion, SY23 3FL - United Kingdom)","institution_ids":["https://openalex.org/I16038530"]},{"raw_affiliation_string":"ISG - Institut Sup\u00e9rieur de Gestion de Tunis [Tunis] (41 Avenue de la Libert\u00e9, cit\u00e9 Bouchoucha, le Bardo, 2000 - Tunisia)","institution_ids":["https://openalex.org/I4210140729"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083499604","display_name":"Christine Zarges","orcid":"https://orcid.org/0000-0002-2829-4296"},"institutions":[{"id":"https://openalex.org/I16038530","display_name":"Aberystwyth University","ror":"https://ror.org/015m2p889","country_code":"GB","type":"education","lineage":["https://openalex.org/I16038530"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Christine Zarges","raw_affiliation_strings":["Aberystwyth University (Penglais, Aberystwyth, Ceredigion, SY23 3FL - United Kingdom)"],"affiliations":[{"raw_affiliation_string":"Aberystwyth University (Penglais, Aberystwyth, Ceredigion, SY23 3FL - United Kingdom)","institution_ids":["https://openalex.org/I16038530"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001516070","display_name":"Ga\u00ebl Beck","orcid":"https://orcid.org/0000-0002-5228-2666"},"institutions":[{"id":"https://openalex.org/I4210091279","display_name":"Universit\u00e9 Sorbonne Paris Nord","ror":"https://ror.org/0199hds37","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210091279"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Ga\u00ebl Beck","raw_affiliation_strings":["UP13 - Universit\u00e9 Paris 13 (France)"],"affiliations":[{"raw_affiliation_string":"UP13 - Universit\u00e9 Paris 13 (France)","institution_ids":["https://openalex.org/I4210091279"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023422312","display_name":"Mustapha Lebbah","orcid":"https://orcid.org/0000-0001-7245-6371"},"institutions":[{"id":"https://openalex.org/I4210091279","display_name":"Universit\u00e9 Sorbonne Paris Nord","ror":"https://ror.org/0199hds37","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210091279"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Mustapha Lebbah","raw_affiliation_strings":["UP13 - Universit\u00e9 Paris 13 (France)"],"affiliations":[{"raw_affiliation_string":"UP13 - Universit\u00e9 Paris 13 (France)","institution_ids":["https://openalex.org/I4210091279"]}]}],"institution_assertions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":2290,"currency":"EUR","value_usd":2890,"provenance":"doaj"},"apc_paid":{"value":2290,"currency":"EUR","value_usd":2890,"provenance":"doaj"},"fwci":1.941,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":19,"citation_normalized_percentile":{"value":0.999888,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":92},"biblio":{"volume":"62","issue":"8","first_page":"3321","last_page":"3386"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11063","display_name":"Rough Sets and Fuzzy Logic","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11063","display_name":"Rough Sets and Fuzzy Logic","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9962,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.984,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.47675666},{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.4470496},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.4117804}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.79325175},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.713873},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.65264624},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.64998186},{"id":"https://openalex.org/C111012933","wikidata":"https://www.wikidata.org/wiki/Q3137210","display_name":"Rough set","level":2,"score":0.61176527},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5952258},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.49540102},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.47675666},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4691032},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.4470496},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.4117804},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3937465},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3892606},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.09787324},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":9,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10115-020-01467-y","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10115-020-01467-y.pdf","source":{"id":"https://openalex.org/S81770430","display_name":"Knowledge and Information Systems","issn_l":"0219-3116","issn":["0219-3116","0219-1377"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310319900"],"host_organization_lineage_names":["Springer Nature","Springer Science+Business Media"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://inria.hal.science/hal-02880626","pdf_url":"https://hal.science/hal-04456307/document","source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":["Centre National de la Recherche Scientifique"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://hal.inria.fr/hal-02880626","pdf_url":"https://inria.hal.science/hal-02880626/document","source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":["Centre National de la Recherche Scientifique"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":false,"landing_page_url":"https://hal.science/hal-04456307","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://pure.aber.ac.uk/ws/files/37252362/ChellyDagdia2020_Article_AScalableAndEffectiveRoughSetT.pdf","pdf_url":"http://pure.aber.ac.uk/ws/files/37252362/ChellyDagdia2020_Article_AScalableAndEffectiveRoughSetT.pdf","source":{"id":"https://openalex.org/S4306401660","display_name":"Aberystwyth Research portal (Aberystwyth University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I16038530","host_organization_name":"Aberystwyth University","host_organization_lineage":["https://openalex.org/I16038530"],"host_organization_lineage_names":["Aberystwyth University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://pure.aber.ac.uk/portal/en/publications/a-scalable-and-effective-rough-set-theory-based-approach-for-big-data-preprocessing(4dc0c124-06c8-4da4-97c3-a78ac75fca92).html","pdf_url":"https://pure.aber.ac.uk/portal/files/37252362/ChellyDagdia2020_Article_AScalableAndEffectiveRoughSetT.pdf","source":{"id":"https://openalex.org/S4306401660","display_name":"Aberystwyth Research portal (Aberystwyth University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I16038530","host_organization_name":"Aberystwyth University","host_organization_lineage":["https://openalex.org/I16038530"],"host_organization_lineage_names":["Aberystwyth University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://inria.hal.science/hal-02880626/file/ChellyDagdia2020_Article_AScalableAndEffectiveRoughSetT.pdf","pdf_url":"https://inria.hal.science/hal-02880626/file/ChellyDagdia2020_Article_AScalableAndEffectiveRoughSetT.pdf","source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":["Centre National de la Recherche Scientifique"],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://hal.inria.fr/hal-02880626/document","pdf_url":"https://hal.inria.fr/hal-02880626/document","source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":["Centre National de la Recherche Scientifique"],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://hal.inria.fr/hal-02880626/file/ChellyDagdia2020_Article_AScalableAndEffectiveRoughSetT.pdf","pdf_url":"https://hal.inria.fr/hal-02880626/file/ChellyDagdia2020_Article_AScalableAndEffectiveRoughSetT.pdf","source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":["Centre National de la Recherche Scientifique"],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10115-020-01467-y","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10115-020-01467-y.pdf","source":{"id":"https://openalex.org/S81770430","display_name":"Knowledge and Information Systems","issn_l":"0219-3116","issn":["0219-3116","0219-1377"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310319900"],"host_organization_lineage_names":["Springer Nature","Springer Science+Business Media"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[],"grants":[{"funder":"https://openalex.org/F4320338337","funder_display_name":"H2020 Marie Sk\u0142odowska-Curie Actions","award_id":"702527"}],"datasets":[],"versions":[],"referenced_works_count":46,"referenced_works":["https://openalex.org/W1485408073","https://openalex.org/W1513494614","https://openalex.org/W1547566968","https://openalex.org/W1557923305","https://openalex.org/W1593693781","https://openalex.org/W1619226191","https://openalex.org/W1673075472","https://openalex.org/W1870481460","https://openalex.org/W1952835952","https://openalex.org/W1967657094","https://openalex.org/W1977298291","https://openalex.org/W1982861695","https://openalex.org/W2027664395","https://openalex.org/W2029307344","https://openalex.org/W2040263621","https://openalex.org/W2068431618","https://openalex.org/W2074634340","https://openalex.org/W2079680557","https://openalex.org/W2102831150","https://openalex.org/W2110173188","https://openalex.org/W2127097372","https://openalex.org/W2128464104","https://openalex.org/W2133462743","https://openalex.org/W2134691826","https://openalex.org/W2143451122","https://openalex.org/W2154053567","https://openalex.org/W2154185789","https://openalex.org/W2157355837","https://openalex.org/W2157690157","https://openalex.org/W2159128662","https://openalex.org/W2164364358","https://openalex.org/W2196468142","https://openalex.org/W2237307454","https://openalex.org/W2287696922","https://openalex.org/W2316630624","https://openalex.org/W2415186390","https://openalex.org/W2545851563","https://openalex.org/W2592371352","https://openalex.org/W2600796512","https://openalex.org/W2771139966","https://openalex.org/W2783605012","https://openalex.org/W2795686572","https://openalex.org/W3120740533","https://openalex.org/W4246198815","https://openalex.org/W4249247926","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2392963705","https://openalex.org/W2382278777","https://openalex.org/W2379409486","https://openalex.org/W2375932290","https://openalex.org/W2369293046","https://openalex.org/W2353240132","https://openalex.org/W2348430812","https://openalex.org/W2107349454","https://openalex.org/W2005378346","https://openalex.org/W1964260090"],"abstract_inverted_index":{"Abstract":[0],"A":[1,28],"big":[2,195],"challenge":[3,42],"in":[4,35,99],"the":[5,36,57,87,90,94,106,154],"knowledge":[6],"discovery":[7],"process":[8],"is":[9,128],"to":[10,38,64,85,118,165,194],"perform":[11],"data":[12,22,60,91,102,107,120,147,161],"pre-processing,":[13,148],"specifically":[14,149],"feature":[15,71,151,183],"selection,":[16,152],"on":[17],"a":[18,137,177],"large":[19],"amount":[20],"of":[21,30,46,50,96],"and":[23,92,109,139,180],"high":[24],"dimensional":[25],"attribute":[26],"set.":[27],"variety":[29],"techniques":[31,52],"have":[32,168],"been":[33,169],"proposed":[34,174],"literature":[37],"deal":[39],"with":[40,43,163],"this":[41,133],"different":[44],"degrees":[45],"success":[47],"as":[48,126],"most":[49],"these":[51,76],"need":[53,63],"further":[54],"information":[55],"about":[56],"given":[58],"input":[59,101],"for":[61,145,150],"thresholding,":[62],"specify":[65],"noise":[66],"levels":[67],"or":[68],"use":[69],"some":[70],"ranking":[72],"procedures.":[73],"To":[74],"overcome":[75],"limitations,":[77],"rough":[78,141],"set":[79,103,142],"theory":[80],"(RST)":[81],"can":[82],"be":[83],"used":[84],"discover":[86],"dependency":[88],"within":[89],"reduce":[93],"number":[95],"attributes":[97,167],"enclosed":[98],"an":[100],"while":[104],"using":[105],"alone":[108],"requiring":[110],"no":[111],"supplementary":[112],"information.":[113],"However,":[114],"when":[115],"it":[116,127,192],"comes":[117],"massive":[119],"sets,":[121],"RST":[122],"reaches":[123],"its":[124,182],"limits":[125],"highly":[129],"computationally":[130],"expensive.":[131],"In":[132,157],"paper,":[134],"we":[135],"propose":[136],"scalable":[138],"effective":[140],"theory-based":[143],"approach":[144],"large-scale":[146],"under":[153],"Spark":[155],"framework.":[156],"our":[158,173],"detailed":[159],"experiments,":[160],"sets":[162],"up":[164],"10,000":[166],"considered,":[170],"revealing":[171],"that":[172],"solution":[175],"achieves":[176],"good":[178],"speedup":[179],"performs":[181],"selection":[184],"task":[185],"well":[186],"without":[187],"sacrificing":[188],"performance.":[189],"Thus,":[190],"making":[191],"relevant":[193],"data.":[196]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3013319350","counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":1}],"updated_date":"2024-12-14T19:20:26.885046","created_date":"2020-04-03"}