{"id":"https://openalex.org/W4306659853","doi":"https://doi.org/10.32614/rj-2022-038","title":"reclin2: a Toolkit for Record Linkage and Deduplication","display_name":"reclin2: a Toolkit for Record Linkage and Deduplication","publication_year":2022,"publication_date":"2022-10-10","ids":{"openalex":"https://openalex.org/W4306659853","doi":"https://doi.org/10.32614/rj-2022-038"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.32614/rj-2022-038","pdf_url":"https://journal.r-project.org/articles/RJ-2022-038/RJ-2022-038.pdf","source":{"id":"https://openalex.org/S2489169438","display_name":"The R Journal","issn_l":"2073-4859","issn":["2073-4859"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://journal.r-project.org/articles/RJ-2022-038/RJ-2022-038.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110538197","display_name":"D.J. van der Laan","orcid":null},"institutions":[{"id":"https://openalex.org/I1322731696","display_name":"Centraal Bureau voor de Statistiek","ror":"https://ror.org/0408v4c28","country_code":"NL","type":"other","lineage":["https://openalex.org/I1322731696"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"D. Jan van der Laan","raw_affiliation_strings":["Statistics Netherlands (CBS)"],"affiliations":[{"raw_affiliation_string":"Statistics Netherlands (CBS)","institution_ids":["https://openalex.org/I1322731696"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5110538197"],"corresponding_institution_ids":["https://openalex.org/I1322731696"],"apc_list":null,"apc_paid":null,"fwci":0.204,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.470279,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":60,"max":70},"biblio":{"volume":"14","issue":"2","first_page":"325","last_page":"333"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.999,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.999,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9397,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.906,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.87820387},{"id":"https://openalex.org/keywords/linkage","display_name":"Linkage (software)","score":0.7252885},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7005676},{"id":"https://openalex.org/keywords/modularity","display_name":"Modularity","score":0.6156578},{"id":"https://openalex.org/keywords/record-linkage","display_name":"Record Linkage","score":0.49784327}],"concepts":[{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.87820387},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.81194586},{"id":"https://openalex.org/C31266012","wikidata":"https://www.wikidata.org/wiki/Q6554340","display_name":"Linkage (software)","level":3,"score":0.7252885},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.71939075},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7005676},{"id":"https://openalex.org/C2779478453","wikidata":"https://www.wikidata.org/wiki/Q6889748","display_name":"Modularity (biology)","level":2,"score":0.6156578},{"id":"https://openalex.org/C154504017","wikidata":"https://www.wikidata.org/wiki/Q853614","display_name":"Identifier","level":2,"score":0.51509494},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5015762},{"id":"https://openalex.org/C142210648","wikidata":"https://www.wikidata.org/wiki/Q1266546","display_name":"Record linkage","level":3,"score":0.49784327},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.48363388},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.42606413},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3332368},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.19356433},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.13311931},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.32614/rj-2022-038","pdf_url":"https://journal.r-project.org/articles/RJ-2022-038/RJ-2022-038.pdf","source":{"id":"https://openalex.org/S2489169438","display_name":"The R Journal","issn_l":"2073-4859","issn":["2073-4859"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.32614/rj-2022-038","pdf_url":"https://journal.r-project.org/articles/RJ-2022-038/RJ-2022-038.pdf","source":{"id":"https://openalex.org/S2489169438","display_name":"The R Journal","issn_l":"2073-4859","issn":["2073-4859"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"display_name":"Decent work and economic growth","score":0.41,"id":"https://metadata.un.org/sdg/8"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":12,"referenced_works":["https://openalex.org/W1547612978","https://openalex.org/W1673955757","https://openalex.org/W1990028388","https://openalex.org/W2073471108","https://openalex.org/W2150871235","https://openalex.org/W2793422535","https://openalex.org/W2908287046","https://openalex.org/W292211760","https://openalex.org/W4399522161","https://openalex.org/W4399570338","https://openalex.org/W4399583987","https://openalex.org/W4399650795"],"related_works":["https://openalex.org/W3088855600","https://openalex.org/W2989796854","https://openalex.org/W2808916796","https://openalex.org/W2794928845","https://openalex.org/W2769740009","https://openalex.org/W2487032012","https://openalex.org/W2211355040","https://openalex.org/W2176311362","https://openalex.org/W2031250218","https://openalex.org/W1501601012"],"abstract_inverted_index":{"The":[0,31,48,131],"goal":[1],"of":[2,23,35,127,133,140],"record":[3,103,128],"linkage":[4,104,129],"and":[5,28,40,73],"deduplication":[6],"is":[7,66,88,135],"to":[8,13,43,56,90,101],"detect":[9],"which":[10],"records":[11],"belong":[12],"the":[14,21,24,41,58,92,114,139],"same":[15],"object":[16],"in":[17,82],"data":[18,46,71],"sets":[19],"where":[20],"identifiers":[22],"objects":[25],"contain":[26],"errors":[27],"missing":[29],"values.":[30],"main":[32],"design":[33],"considerations":[34],"reclin2":[36,134],"are:":[37],"modularity/flexibility,":[38],"speed":[39],"ability":[42],"handle":[44],"large":[45,85],"sets.":[47],"first":[49],"points":[50],"makes":[51],"it":[52,87],"easy":[53],"for":[54,106,110,144],"users":[55],"extend":[57],"package":[59,116,121],"with":[60],"custom":[61],"process":[62],"steps.":[63],"This":[64],"flexibility":[65],"obtained":[67],"by":[68,74],"using":[69],"simple":[70],"structures":[72],"following":[75],"as":[76,78],"close":[77],"possible":[79,89],"common":[80],"interfaces":[81],"R.":[83],"For":[84],"problems":[86],"distribute":[91],"work":[93],"over":[94],"multiple":[95],"worker":[96],"nodes.":[97],"A":[98],"benchmark":[99],"comparison":[100],"other":[102],"packages":[105],"R,":[107],"shows":[108],"that":[109],"this":[111,120],"specific":[112,125],"benchmark,":[113],"fastLink":[115,141],"performs":[117,123],"best.":[118],"However,":[119],"only":[122],"one":[124],"type":[126],"model.":[130],"performance":[132],"not":[136],"far":[137],"behind":[138],"while":[142],"allowing":[143],"much":[145],"greater":[146],"flexibility.":[147]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4306659853","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-01-08T05:35:17.895346","created_date":"2022-10-18"}