{"id":"https://openalex.org/W2163794926","doi":"https://doi.org/10.1109/icde.2009.113","title":"Similarity Group-By","display_name":"Similarity Group-By","publication_year":2009,"publication_date":"2009-03-01","ids":{"openalex":"https://openalex.org/W2163794926","doi":"https://doi.org/10.1109/icde.2009.113","mag":"2163794926"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icde.2009.113","pdf_url":null,"source":{"id":"https://openalex.org/S4210210321","display_name":"Proceedings - International Conference on Data Engineering","issn_l":"1084-4627","issn":["1084-4627","2375-0286"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088912009","display_name":"Yasin N. Silva","orcid":"https://orcid.org/0000-0003-1852-1683"},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yasin N. Silva","raw_affiliation_strings":["Department of Computer Science, Purdue University, IN, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Purdue University, IN, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000123743","display_name":"Walid G. Aref","orcid":"https://orcid.org/0000-0001-8169-7775"},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Walid G. Aref","raw_affiliation_strings":["Department of Computer Science, Purdue University, IN, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Purdue University, IN, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103350899","display_name":"Mohamed H. Ali","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mohamed H. Ali","raw_affiliation_strings":["Microsoft Corporation, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.808,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":33,"citation_normalized_percentile":{"value":0.917017,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":92,"max":93},"biblio":{"volume":null,"issue":null,"first_page":"904","last_page":"915"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9993,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9963,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.68961906},{"id":"https://openalex.org/keywords/online-analytical-processing","display_name":"Online analytical processing","score":0.6049431},{"id":"https://openalex.org/keywords/operator","display_name":"Operator (biology)","score":0.4777997}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.74754757},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.68961906},{"id":"https://openalex.org/C201932085","wikidata":"https://www.wikidata.org/wiki/Q642514","display_name":"Online analytical processing","level":3,"score":0.6049431},{"id":"https://openalex.org/C2781311116","wikidata":"https://www.wikidata.org/wiki/Q83306","display_name":"Group (periodic table)","level":2,"score":0.5970663},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5897946},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5893893},{"id":"https://openalex.org/C510870499","wikidata":"https://www.wikidata.org/wiki/Q47607","display_name":"SQL","level":2,"score":0.5796277},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.51951754},{"id":"https://openalex.org/C17020691","wikidata":"https://www.wikidata.org/wiki/Q139677","display_name":"Operator (biology)","level":5,"score":0.4777997},{"id":"https://openalex.org/C191087605","wikidata":"https://www.wikidata.org/wiki/Q1501395","display_name":"Online transaction processing","level":4,"score":0.43484962},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3811008},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.34920692},{"id":"https://openalex.org/C75949130","wikidata":"https://www.wikidata.org/wiki/Q848010","display_name":"Database transaction","level":2,"score":0.21162757},{"id":"https://openalex.org/C135572916","wikidata":"https://www.wikidata.org/wiki/Q193351","display_name":"Data warehouse","level":2,"score":0.1999205},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.14174947},{"id":"https://openalex.org/C72108876","wikidata":"https://www.wikidata.org/wiki/Q844565","display_name":"Transaction processing","level":3,"score":0.1375243},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C158448853","wikidata":"https://www.wikidata.org/wiki/Q425218","display_name":"Repressor","level":4,"score":0.0},{"id":"https://openalex.org/C86339819","wikidata":"https://www.wikidata.org/wiki/Q407384","display_name":"Transcription factor","level":3,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icde.2009.113","pdf_url":null,"source":{"id":"https://openalex.org/S4210210321","display_name":"Proceedings - International Conference on Data Engineering","issn_l":"1084-4627","issn":["1084-4627","2375-0286"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.73,"display_name":"Peace, justice, and strong institutions","id":"https://metadata.un.org/sdg/16"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":34,"referenced_works":["https://openalex.org/W105107704","https://openalex.org/W1501500081","https://openalex.org/W1516185881","https://openalex.org/W1561514023","https://openalex.org/W1597610884","https://openalex.org/W1761301028","https://openalex.org/W1976219590","https://openalex.org/W1992419399","https://openalex.org/W1996510517","https://openalex.org/W2004420255","https://openalex.org/W2006685053","https://openalex.org/W2016854254","https://openalex.org/W2029038562","https://openalex.org/W2043793719","https://openalex.org/W2058458206","https://openalex.org/W2074836925","https://openalex.org/W2075827270","https://openalex.org/W2087619740","https://openalex.org/W2094951520","https://openalex.org/W2095897464","https://openalex.org/W2100344784","https://openalex.org/W2123060058","https://openalex.org/W2124645273","https://openalex.org/W2142669732","https://openalex.org/W2145061307","https://openalex.org/W2171492933","https://openalex.org/W2978337448","https://openalex.org/W3002980296","https://openalex.org/W3023740622","https://openalex.org/W4231029117","https://openalex.org/W4239184613","https://openalex.org/W4285719527","https://openalex.org/W8793079","https://openalex.org/W96772584"],"related_works":["https://openalex.org/W834810655","https://openalex.org/W2759374221","https://openalex.org/W2612731889","https://openalex.org/W2400366270","https://openalex.org/W2188975716","https://openalex.org/W2102729946","https://openalex.org/W2084383859","https://openalex.org/W2067405892","https://openalex.org/W1555950237","https://openalex.org/W132467702"],"abstract_inverted_index":{"Group-by":[0],"is":[1,7,22,47,55,65],"a":[2,37,49,56,110],"core":[3],"database":[4],"operation":[5,117],"that":[6,41,64,102,155],"used":[8],"extensively":[9],"in":[10,171],"OLTP,":[11],"OLAP,":[12],"and":[13,58,71,118],"decision":[14],"support":[15],"systems.":[16],"In":[17,32,82],"many":[18,104],"application":[19,105],"scenarios,":[20],"it":[21],"required":[23],"to":[24,77,84,139],"group":[25],"similar":[26],"but":[27,53],"not":[28,48],"necessarily":[29],"equal":[30],"values.":[31],"this":[33,123],"paper":[34,108,126],"we":[35],"propose":[36],"new":[38,50],"SQL":[39,69],"construct":[40],"supports":[42],"similarity-based":[43,79,158],"group-by":[44,91,116,135,159],"(SGB).":[45],"SGB":[46],"clustering":[51,86],"algorithm,":[52],"rather":[54],"practical":[57],"fast":[59],"similarity":[60,90,115],"grouping":[61],"query":[62],"operator":[63,92],"compatible":[66],"with":[67,75,165],"other":[68],"operators":[70,146,160],"can":[72,136],"be":[73,137],"combined":[74],"them":[76],"answer":[78],"queries":[80],"efficiently.":[81],"contrast":[83],"expensive":[85],"algorithms,":[87],"the":[88,114,133,140,156,175],"proposed":[89,145,157],"maintains":[93],"low":[94],"execution":[95,172],"times":[96],"while":[97],"still":[98],"generating":[99],"meaningful":[100],"groupings":[101],"address":[103],"needs.":[106],"The":[107,125,144,151],"presents":[109],"general":[111],"definition":[112],"of":[113,122,142],"gives":[119],"three":[120],"instances":[121],"definition.":[124],"also":[127],"discusses":[128],"how":[129],"optimization":[130],"techniques":[131],"for":[132],"regular":[134,176],"extended":[138],"case":[141],"SGB.":[143],"are":[147],"implemented":[148],"inside":[149],"PostgreSQL.":[150],"performance":[152],"study":[153],"shows":[154],"have":[161],"good":[162],"scalability":[163],"properties":[164],"at":[166],"most":[167],"only":[168],"25%":[169],"increase":[170],"time":[173],"over":[174],"group-by.":[177]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2163794926","counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2015,"cited_by_count":4},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":8}],"updated_date":"2025-01-09T07:19:11.955674","created_date":"2016-06-24"}