{"id":"https://openalex.org/W4308883303","doi":"https://doi.org/10.1145/3568027","title":"Efficient Sorting, Duplicate Removal, Grouping, and Aggregation","display_name":"Efficient Sorting, Duplicate Removal, Grouping, and Aggregation","publication_year":2022,"publication_date":"2022-11-10","ids":{"openalex":"https://openalex.org/W4308883303","doi":"https://doi.org/10.1145/3568027"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3568027","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3568027","source":{"id":"https://openalex.org/S90119964","display_name":"ACM Transactions on Database Systems","issn_l":"0362-5915","issn":["0362-5915","1557-4644"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3568027","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101511221","display_name":"Thanh Do","orcid":"https://orcid.org/0000-0001-9893-5725"},"institutions":[{"id":"https://openalex.org/I4210098601","display_name":"Celsion (United States)","ror":"https://ror.org/00tzssk35","country_code":"US","type":"company","lineage":["https://openalex.org/I4210098601"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Thanh Do","raw_affiliation_strings":["Celonis Inc., New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Celonis Inc., New York, NY, USA","institution_ids":["https://openalex.org/I4210098601"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041862671","display_name":"Goetz Graefe","orcid":"https://orcid.org/0000-0003-0194-6466"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"funder","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Goetz Graefe","raw_affiliation_strings":["Google Inc., Madison, WI, USA"],"affiliations":[{"raw_affiliation_string":"Google Inc., Madison, WI, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017678555","display_name":"Jeffrey F. Naughton","orcid":"https://orcid.org/0000-0002-3710-8096"},"institutions":[{"id":"https://openalex.org/I4210098601","display_name":"Celsion (United States)","ror":"https://ror.org/00tzssk35","country_code":"US","type":"company","lineage":["https://openalex.org/I4210098601"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jeffrey Naughton","raw_affiliation_strings":["Celonis Inc., New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Celonis Inc., New York, NY, USA","institution_ids":["https://openalex.org/I4210098601"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.38,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.816641,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":90},"biblio":{"volume":"47","issue":"4","first_page":"1","last_page":"35"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9993,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9962,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/merge","display_name":"Merge (version control)","score":0.54333586},{"id":"https://openalex.org/keywords/merge-algorithm","display_name":"Merge algorithm","score":0.50020623}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8808746},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.6664455},{"id":"https://openalex.org/C88548561","wikidata":"https://www.wikidata.org/wiki/Q347599","display_name":"sort","level":2,"score":0.6269924},{"id":"https://openalex.org/C108094655","wikidata":"https://www.wikidata.org/wiki/Q181593","display_name":"Sorting algorithm","level":3,"score":0.5976386},{"id":"https://openalex.org/C111696304","wikidata":"https://www.wikidata.org/wiki/Q2303697","display_name":"Sorting","level":2,"score":0.5845151},{"id":"https://openalex.org/C197129107","wikidata":"https://www.wikidata.org/wiki/Q1921621","display_name":"Merge (version control)","level":2,"score":0.54333586},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5270988},{"id":"https://openalex.org/C140086265","wikidata":"https://www.wikidata.org/wiki/Q11341754","display_name":"Merge algorithm","level":4,"score":0.50020623},{"id":"https://openalex.org/C67388219","wikidata":"https://www.wikidata.org/wiki/Q207440","display_name":"Hash table","level":3,"score":0.47171658},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.43132827},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.34460258},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.22221038},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.19782388},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3568027","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3568027","source":{"id":"https://openalex.org/S90119964","display_name":"ACM Transactions on Database Systems","issn_l":"0362-5915","issn":["0362-5915","1557-4644"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2010.00152","pdf_url":"https://arxiv.org/pdf/2010.00152","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3568027","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3568027","source":{"id":"https://openalex.org/S90119964","display_name":"ACM Transactions on Database Systems","issn_l":"0362-5915","issn":["0362-5915","1557-4644"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":24,"referenced_works":["https://openalex.org/W1605782097","https://openalex.org/W1972754280","https://openalex.org/W1974796034","https://openalex.org/W1981420413","https://openalex.org/W2020191321","https://openalex.org/W2058954251","https://openalex.org/W2063259549","https://openalex.org/W2067089961","https://openalex.org/W2068739275","https://openalex.org/W2106887953","https://openalex.org/W2151203234","https://openalex.org/W2153329411","https://openalex.org/W2158237121","https://openalex.org/W2169486917","https://openalex.org/W2173213060","https://openalex.org/W2326587081","https://openalex.org/W2474900888","https://openalex.org/W2901608006","https://openalex.org/W3023647491","https://openalex.org/W3137759927","https://openalex.org/W3197796305","https://openalex.org/W4249653116","https://openalex.org/W4299415997","https://openalex.org/W4300868310"],"related_works":["https://openalex.org/W4389428824","https://openalex.org/W4320925713","https://openalex.org/W3014215895","https://openalex.org/W2925598649","https://openalex.org/W2595864772","https://openalex.org/W2511099490","https://openalex.org/W2190513794","https://openalex.org/W2140238930","https://openalex.org/W1997715509","https://openalex.org/W19461966"],"abstract_inverted_index":{"Database":[0],"query":[1,48,144],"processing":[2],"requires":[3,22],"algorithms":[4,12],"for":[5,73,82,120,134,159,192],"duplicate":[6,161],"removal,":[7,162],"grouping,":[8,163],"and":[9,32,67,70,99,109,113,164,179],"aggregation.":[10,165],"Three":[11],"exist:":[13],"in-stream":[14],"aggregation":[15,26,34,79,107,190],"is":[16,80,138],"most":[17],"efficient":[18],"by":[19],"far":[20],"but":[21],"sorted":[23,74,119,205],"input;":[24],"sort-based":[25,160,181],"relies":[27,35],"on":[28,36,56],"external":[29],"merge":[30,127],"sort;":[31],"hash":[33,39,42],"an":[37],"in-memory":[38],"table":[40],"plus":[41],"partitioning":[43],"to":[44,53,117,147],"temporary":[45],"storage.":[46],"Cost-based":[47],"optimization":[49],"chooses":[50],"which":[51],"algorithm":[52,149,158,168,191,198,203,219],"use":[54],"based":[55],"several":[57],"factors,":[58],"including":[59],"the":[60,64,71,86,96,114,130,201,217],"sort":[61],"order":[62],"of":[63,92,226],"input,":[65],"input":[66,98,108],"output":[68,83,110,115,206],"sizes,":[69],"need":[72],"output.":[75],"For":[76],"example,":[77],"hash-based":[78,178],"ideal":[81],"smaller":[84],"than":[85],"available":[87],"memory":[88],"(e.g.,":[89],"Query":[90,215],"1":[91],"TPC-H),":[93],"whereas":[94],"sorting":[95,102],"entire":[97],"aggregating":[100],"after":[101],"are":[103,111],"preferable":[104],"when":[105],"both":[106,176],"large":[112],"needs":[116],"be":[118],"a":[121,126,135,156,187],"subsequent":[122,211],"operation":[123],"such":[124],"as":[125,173,175,186],"join.":[128],"Unfortunately,":[129],"size":[131],"information":[132],"required":[133],"sound":[136],"choice":[137],"often":[139],"inaccurate":[140],"or":[141],"unavailable":[142],"during":[143],"optimization,":[145],"leading":[146],"sub-optimal":[148],"choices.":[150,199],"In":[151],"response,":[152],"this":[153],"article":[154],"introduces":[155],"new":[157,167,202,218],"The":[166],"always":[169],"performs":[170],"at":[171],"least":[172],"well":[174],"traditional":[177,180],"algorithms.":[182],"It":[183],"can":[184,208],"serve":[185],"system\u2019s":[188],"only":[189],"unsorted":[193],"inputs,":[194],"thus":[195],"preventing":[196],"erroneous":[197],"Furthermore,":[200],"produces":[204],"that":[207,223],"speed":[209],"up":[210],"operations.":[212],"Google\u2019s":[213],"F1":[214],"uses":[216],"in":[220],"production":[221],"workloads":[222],"aggregate":[224],"petabytes":[225],"data":[227],"every":[228],"day.":[229]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4308883303","counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":3}],"updated_date":"2025-03-30T08:02:00.612281","created_date":"2022-11-18"}