{"id":"https://openalex.org/W3106272338","doi":"https://doi.org/10.1145/3292500.3330825","title":"A Memory-Efficient Sketch Method for Estimating High Similarities in Streaming Sets","display_name":"A Memory-Efficient Sketch Method for Estimating High Similarities in Streaming Sets","publication_year":2019,"publication_date":"2019-07-25","ids":{"openalex":"https://openalex.org/W3106272338","doi":"https://doi.org/10.1145/3292500.3330825","mag":"3106272338"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3292500.3330825","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"proceedings-article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1905.08977","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022240408","display_name":"Pinghui Wang","orcid":"https://orcid.org/0000-0001-5779-6108"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pinghui Wang","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, Shaanxi, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056609151","display_name":"Yiyan Qi","orcid":"https://orcid.org/0000-0002-8078-5834"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiyan Qi","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, Shaanxi, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100710888","display_name":"Yuanming Zhang","orcid":"https://orcid.org/0000-0002-1191-4153"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanming Zhang","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, Shaanxi, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032033256","display_name":"Qiaozhu Zhai","orcid":"https://orcid.org/0000-0002-7312-4923"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiaozhu Zhai","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, Shaanxi, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100455048","display_name":"Chenxu Wang","orcid":"https://orcid.org/0000-0002-9539-5046"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenxu Wang","raw_affiliation_strings":["Xi'an Jiaotong University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Shenzhen, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068489266","display_name":"John C. S. Lui","orcid":"https://orcid.org/0000-0001-7466-0384"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"John C.S. Lui","raw_affiliation_strings":["The Chinese University of Hong Kong, Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075845093","display_name":"Xiaohong Guan","orcid":"https://orcid.org/0000-0002-8826-0362"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaohong Guan","raw_affiliation_strings":["Xi'an Jiaotong University & Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University & Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I87445476","https://openalex.org/I99065089"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":33,"citation_normalized_percentile":{"value":0.999829,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":95},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9948,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9945,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/jaccard-index","display_name":"Jaccard index","score":0.9533515},{"id":"https://openalex.org/keywords/sketch","display_name":"Sketch","score":0.76008713},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.6088675},{"id":"https://openalex.org/keywords/cardinality","display_name":"Cardinality (data modeling)","score":0.598518}],"concepts":[{"id":"https://openalex.org/C203519979","wikidata":"https://www.wikidata.org/wiki/Q865360","display_name":"Jaccard index","level":3,"score":0.9533515},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.76354504},{"id":"https://openalex.org/C2779231336","wikidata":"https://www.wikidata.org/wiki/Q7534724","display_name":"Sketch","level":2,"score":0.76008713},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.6088675},{"id":"https://openalex.org/C87117476","wikidata":"https://www.wikidata.org/wiki/Q362383","display_name":"Cardinality (data modeling)","level":2,"score":0.598518},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5284909},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.41101024},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3955507},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.32287574},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3132829},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2222704},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3292500.3330825","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/1905.08977","pdf_url":"https://arxiv.org/pdf/1905.08977","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.1905.08977","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/1905.08977","pdf_url":"https://arxiv.org/pdf/1905.08977","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"display_name":"Quality education","id":"https://metadata.un.org/sdg/4","score":0.54}],"grants":[],"datasets":[],"versions":["https://openalex.org/W2944916577","https://openalex.org/W3106272338"],"referenced_works_count":41,"referenced_works":["https://openalex.org/W107173025","https://openalex.org/W1506430804","https://openalex.org/W1583707981","https://openalex.org/W1999092742","https://openalex.org/W2005731313","https://openalex.org/W2008365755","https://openalex.org/W2012833704","https://openalex.org/W2025051251","https://openalex.org/W2029852131","https://openalex.org/W2048346983","https://openalex.org/W2048779798","https://openalex.org/W2053377618","https://openalex.org/W2060170830","https://openalex.org/W2064379477","https://openalex.org/W2081193615","https://openalex.org/W2099480861","https://openalex.org/W2102221597","https://openalex.org/W2120031510","https://openalex.org/W2123845384","https://openalex.org/W2126907894","https://openalex.org/W2132069633","https://openalex.org/W2140129471","https://openalex.org/W2140431670","https://openalex.org/W2144982963","https://openalex.org/W2147017814","https://openalex.org/W2152228468","https://openalex.org/W2162006472","https://openalex.org/W2171013708","https://openalex.org/W2418258478","https://openalex.org/W2555576367","https://openalex.org/W2583214467","https://openalex.org/W2604101616","https://openalex.org/W2604248105","https://openalex.org/W2762566515","https://openalex.org/W2765866471","https://openalex.org/W2772632044","https://openalex.org/W2785764160","https://openalex.org/W2891345706","https://openalex.org/W2963436558","https://openalex.org/W3102219154","https://openalex.org/W4230940751"],"related_works":["https://openalex.org/W4366711670","https://openalex.org/W4206503171","https://openalex.org/W2972216353","https://openalex.org/W2950817225","https://openalex.org/W2945869148","https://openalex.org/W2398781203","https://openalex.org/W2091133150","https://openalex.org/W2012019886","https://openalex.org/W2009279505","https://openalex.org/W1994775821"],"abstract_inverted_index":{"Estimating":[0],"set":[1],"similarity":[2,29,42,174],"and":[3,18,32,44,54,99,109,188,217,238],"detecting":[4],"highly":[5],"similar":[6],"sets":[7,31,84],"are":[8,93],"fundamental":[9],"problems":[10],"in":[11,95,135],"areas":[12],"such":[13,40],"as":[14,41,85,87],"databases,":[15],"machine":[16],"learning,":[17],"information":[19],"retrieval.":[20],"MinHash":[21,53,66,78,108,233],"is":[22,101,225],"a":[23,96,124,157,166,202,213],"well-known":[24],"technique":[25],"for":[26,37,69,160,171,183,201,205,241],"approximating":[27],"Jaccard":[28,133,173],"of":[30,63,90,150,198,215],"has":[33],"been":[34],"successfully":[35],"used":[36,200],"many":[38],"applications":[39],"search":[43],"large":[45],"scale":[46],"learning.":[47],"Its":[48],"two":[49],"compressed":[50],"versions,":[51],"b-bit":[52,107],"Odd":[55,110],"Sketch,":[56],"can":[57,79],"significantly":[58],"reduce":[59],"the":[60,64,185,190,196,206,235],"memory":[61,125,193,230],"usage":[62,194],"original":[65],"method,":[67,128],"especially":[68],"estimating":[70,242],"high":[71,243],"similarities":[72,74,134],"(i.e.,":[73,195],"around":[75],"1).":[76],"Although":[77],"be":[80],"applied":[81],"to":[82,113,130,139,155],"static":[83],"well":[86],"streaming":[88,97,116,136],"sets,":[89],"which":[91],"elements":[92],"given":[94],"fashion":[98],"cardinality":[100],"unknown":[102],"or":[103],"even":[104],"infinite,":[105],"unfortunately,":[106],"Sketch":[111],"fail":[112],"deal":[114],"with":[115,234],"data.":[117],"To":[118],"solve":[119],"this":[120],"problem,":[121],"we":[122,180],"design":[123],"efficient":[126,231],"sketch":[127,159],"MaxLogHash,":[129],"accurately":[131],"estimate":[132],"sets.":[137],"Compared":[138],"MinHash,":[140],"our":[141,222],"method":[142,223],"uses":[143],"smaller":[144],"sized":[145],"registers":[146,199],"(each":[147],"register":[148],"consists":[149],"less":[151],"than":[152,232],"7":[153],"bits)":[154],"build":[156],"compact":[158],"each":[161],"set.":[162],"We":[163,209],"also":[164],"provide":[165],"simple":[167],"yet":[168],"accurate":[169],"estimator":[170],"inferring":[172],"from":[175],"MaxLogHash":[176,203,224],"sketches.":[177],"In":[178],"addition,":[179],"derive":[181],"formulas":[182],"bounding":[184],"estimation":[186],"error":[187],"determine":[189],"smallest":[191],"necessary":[192],"number":[197],"sketch)":[204],"desired":[207],"accuracy.":[208],"conduct":[210],"experiments":[211],"on":[212],"variety":[214],"datasets,":[216],"experimental":[218],"results":[219],"show":[220],"that":[221],"about":[226],"5":[227],"times":[228],"more":[229],"same":[236],"accuracy":[237],"computational":[239],"cost":[240],"similarities.":[244]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3106272338","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":1}],"updated_date":"2024-12-14T15:33:30.001188","created_date":"2020-11-23"}