{"id":"https://openalex.org/W2912720401","doi":"https://doi.org/10.1109/bigdata.2018.8622024","title":"A Distributed Rough Set Theory Algorithm based on Locality Sensitive Hashing for an Efficient Big Data Pre-processing","display_name":"A Distributed Rough Set Theory Algorithm based on Locality Sensitive Hashing for an Efficient Big Data Pre-processing","publication_year":2018,"publication_date":"2018-12-01","ids":{"openalex":"https://openalex.org/W2912720401","doi":"https://doi.org/10.1109/bigdata.2018.8622024","mag":"2912720401"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2018.8622024","pdf_url":null,"source":{"id":"https://openalex.org/S4363607718","display_name":"2021 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://research.aber.ac.uk/files/29036588/PID5673625.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018448962","display_name":"Zaineb Chelly Dagdia","orcid":"https://orcid.org/0000-0002-2551-6586"},"institutions":[{"id":"https://openalex.org/I4210140729","display_name":"Institut Sup\u00e9rieur de Gestion de Tunis","ror":"https://ror.org/04w9mdw91","country_code":"TN","type":"education","lineage":["https://openalex.org/I108714496","https://openalex.org/I4210140729"]}],"countries":["TN"],"is_corresponding":false,"raw_author_name":"Zaineb Chelly Dagdia","raw_affiliation_strings":["LARODEC, Institut Sup\u00e9rieur de Gestion de Tunis, Tunis, Tunisia"],"affiliations":[{"raw_affiliation_string":"LARODEC, Institut Sup\u00e9rieur de Gestion de Tunis, Tunis, Tunisia","institution_ids":["https://openalex.org/I4210140729"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083499604","display_name":"Christine Zarges","orcid":"https://orcid.org/0000-0002-2829-4296"},"institutions":[{"id":"https://openalex.org/I16038530","display_name":"Aberystwyth University","ror":"https://ror.org/015m2p889","country_code":"GB","type":"education","lineage":["https://openalex.org/I16038530"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Christine Zarges","raw_affiliation_strings":["Department of Computer Science, Aberystywth University, Aberystwyth, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Aberystywth University, Aberystwyth, United Kingdom","institution_ids":["https://openalex.org/I16038530"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001516070","display_name":"Ga\u00ebl Beck","orcid":"https://orcid.org/0000-0002-5228-2666"},"institutions":[{"id":"https://openalex.org/I4210156583","display_name":"Laboratoire d'Informatique de Paris-Nord","ror":"https://ror.org/05g1zjw44","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I4210091279","https://openalex.org/I4210156583","https://openalex.org/I4210159245"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Gael Beck","raw_affiliation_strings":["Computer Science Laboratory (LIPN), University Paris-North - 13, Villetaneuse, France"],"affiliations":[{"raw_affiliation_string":"Computer Science Laboratory (LIPN), University Paris-North - 13, Villetaneuse, France","institution_ids":["https://openalex.org/I4210156583"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113698150","display_name":"Hanene Azzag","orcid":null},"institutions":[{"id":"https://openalex.org/I4210156583","display_name":"Laboratoire d'Informatique de Paris-Nord","ror":"https://ror.org/05g1zjw44","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I4210091279","https://openalex.org/I4210156583","https://openalex.org/I4210159245"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Hanene Azzag","raw_affiliation_strings":["Computer Science Laboratory (LIPN), University Paris-North - 13, Villetaneuse, France"],"affiliations":[{"raw_affiliation_string":"Computer Science Laboratory (LIPN), University Paris-North - 13, Villetaneuse, France","institution_ids":["https://openalex.org/I4210156583"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023422312","display_name":"Mustapha Lebbah","orcid":"https://orcid.org/0000-0001-7245-6371"},"institutions":[{"id":"https://openalex.org/I4210156583","display_name":"Laboratoire d'Informatique de Paris-Nord","ror":"https://ror.org/05g1zjw44","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I4210091279","https://openalex.org/I4210156583","https://openalex.org/I4210159245"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Mustapha Lebbah","raw_affiliation_strings":["Computer Science Laboratory (LIPN), University Paris-North - 13, Villetaneuse, France"],"affiliations":[{"raw_affiliation_string":"Computer Science Laboratory (LIPN), University Paris-North - 13, Villetaneuse, France","institution_ids":["https://openalex.org/I4210156583"]}]}],"institution_assertions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.423,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":9,"citation_normalized_percentile":{"value":0.586224,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":84,"max":85},"biblio":{"volume":null,"issue":null,"first_page":"2597","last_page":"2606"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9991,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9991,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9957,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9927,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/locality-sensitive-hashing","display_name":"Locality-sensitive hashing","score":0.7181969},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.44843996},{"id":"https://openalex.org/keywords/distributed-database","display_name":"Distributed database","score":0.4465911}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8151371},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.73831284},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7209183},{"id":"https://openalex.org/C74270461","wikidata":"https://www.wikidata.org/wiki/Q1625299","display_name":"Locality-sensitive hashing","level":4,"score":0.7181969},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.58625525},{"id":"https://openalex.org/C111012933","wikidata":"https://www.wikidata.org/wiki/Q3137210","display_name":"Rough set","level":2,"score":0.5431683},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5274912},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.49418357},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.47323215},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.44843996},{"id":"https://openalex.org/C70061542","wikidata":"https://www.wikidata.org/wiki/Q989016","display_name":"Distributed database","level":2,"score":0.4465911},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.42800272},{"id":"https://openalex.org/C67388219","wikidata":"https://www.wikidata.org/wiki/Q207440","display_name":"Hash table","level":3,"score":0.36061394},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3514856},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.34223205},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.258861},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.19218853},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2018.8622024","pdf_url":null,"source":{"id":"https://openalex.org/S4363607718","display_name":"2021 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://pure.aber.ac.uk/portal/en/publications/a-distributed-rough-set-theory-algorithm-based-on-locality-sensitive-hashing-for-an-efficient-big-data-preprocessing(5089b870-8a06-4309-b805-1237969acb85).html","pdf_url":"https://research.aber.ac.uk/files/29036588/PID5673625.pdf","source":{"id":"https://openalex.org/S4306401660","display_name":"Aberystwyth Research portal (Aberystwyth University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I16038530","host_organization_name":"Aberystwyth University","host_organization_lineage":["https://openalex.org/I16038530"],"host_organization_lineage_names":["Aberystwyth University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://pure.aber.ac.uk/ws/files/29036588/PID5673625.pdf","pdf_url":"https://pure.aber.ac.uk/ws/files/29036588/PID5673625.pdf","source":{"id":"https://openalex.org/S4306401660","display_name":"Aberystwyth Research portal (Aberystwyth University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I16038530","host_organization_name":"Aberystwyth University","host_organization_lineage":["https://openalex.org/I16038530"],"host_organization_lineage_names":["Aberystwyth University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://pure.aber.ac.uk/portal/en/publications/a-distributed-rough-set-theory-algorithm-based-on-locality-sensitive-hashing-for-an-efficient-big-data-preprocessing(5089b870-8a06-4309-b805-1237969acb85).html","pdf_url":"https://research.aber.ac.uk/files/29036588/PID5673625.pdf","source":{"id":"https://openalex.org/S4306401660","display_name":"Aberystwyth Research portal (Aberystwyth University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I16038530","host_organization_name":"Aberystwyth University","host_organization_lineage":["https://openalex.org/I16038530"],"host_organization_lineage_names":["Aberystwyth University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":24,"referenced_works":["https://openalex.org/W1502916507","https://openalex.org/W1513494614","https://openalex.org/W1547566968","https://openalex.org/W1870428314","https://openalex.org/W1952835952","https://openalex.org/W1982193706","https://openalex.org/W1987040325","https://openalex.org/W1992371516","https://openalex.org/W2012833704","https://openalex.org/W2029307344","https://openalex.org/W2127097372","https://openalex.org/W2132069633","https://openalex.org/W2143451122","https://openalex.org/W2147717514","https://openalex.org/W2154053567","https://openalex.org/W2159128662","https://openalex.org/W2162006472","https://openalex.org/W2562786159","https://openalex.org/W2783605012","https://openalex.org/W3120740533","https://openalex.org/W4230940751","https://openalex.org/W4236642514","https://openalex.org/W4244546497","https://openalex.org/W4249894105"],"related_works":["https://openalex.org/W2975588143","https://openalex.org/W2754607325","https://openalex.org/W2752074276","https://openalex.org/W2147226516","https://openalex.org/W2144265691","https://openalex.org/W2135779989","https://openalex.org/W2120031510","https://openalex.org/W2080135560","https://openalex.org/W2010970209","https://openalex.org/W144856782"],"abstract_inverted_index":{"A":[0],"big":[1,11,49,100,173],"challenge":[2],"in":[3,72,128,192,201],"the":[4,31,65,68,73,111,116,123,126,137,150,183,186,202],"knowledge":[5],"discovery":[6],"process":[7],"is":[8,64,143,157],"to":[9,39,48,106,121,136,172],"perform":[10],"data":[12,78,101,174,199],"pre-processing;":[13],"specifically":[14],"feature":[15,42,69,166,189],"selection.":[16,43],"To":[17,44],"handle":[18],"this":[19,81],"challenge,":[20],"Rough":[21],"Set":[22],"Theory":[23],"(RST)":[24],"has":[25,37],"been":[26],"considered":[27],"as":[28,35],"one":[29,59],"of":[30,54,60,67,89,125,149,185],"most":[32],"powerful":[33],"techniques":[34],"it":[36,169],"much":[38],"offer":[40],"for":[41,99,165],"extend":[45],"its":[46,61],"applicability":[47],"data,":[50],"a":[51,85,129,146,193,207],"distributed":[52,74,87,139,203],"version":[53,88],"RST":[55,90,140],"was":[56],"developed.":[57],"However,":[58],"key":[62],"challenges":[63],"partitioning":[66,148,184],"search":[70,190],"space":[71,191],"environment":[75],"while":[76],"guaranteeing":[77],"dependency.":[79],"In":[80],"paper,":[82],"we":[83],"propose":[84],"new":[86],"based":[91,144],"on":[92,145],"Locality":[93],"Sensitive":[94],"Hashing":[95],"(LSH),":[96],"named":[97],"LSH-dRST,":[98],"pre-processing.":[102,175],"LSH-dRST":[103,135,156,181],"uses":[104],"LSH":[105],"match":[107],"similar":[108],"features":[109],"into":[110,119],"same":[112],"bucket":[113],"and":[114,152,205],"maps":[115],"generated":[117],"buckets":[118],"partitions":[120],"enable":[122],"splitting":[124],"universe":[127,151],"more":[130,163,170,194],"appropriate":[131],"way.":[132,196],"We":[133,176],"compare":[134],"standard":[138],"technique":[141],"which":[142],"random":[147],"demonstrate":[153,178],"that":[154,179],"our":[155,180],"not":[158],"only":[159],"scalable":[160],"but":[161],"also":[162,177],"reliable":[164,195],"selection;":[167],"making":[168],"relevant":[171],"ensures":[182,206],"high":[187],"dimensional":[188],"Hence,":[197],"guarantees":[198],"dependency":[200],"environment,":[204],"lower":[208],"computational":[209],"cost.":[210]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2912720401","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2019,"cited_by_count":2}],"updated_date":"2025-01-16T20:30:42.807474","created_date":"2019-02-21"}