{"id":"https://openalex.org/W3003583619","doi":"https://doi.org/10.5753/sbbd.2019.8820","title":"Industrial Paper: Large-scale Record Linkage of Web-based Place Entities","display_name":"Industrial Paper: Large-scale Record Linkage of Web-based Place Entities","publication_year":2019,"publication_date":"2019-10-07","ids":{"openalex":"https://openalex.org/W3003583619","doi":"https://doi.org/10.5753/sbbd.2019.8820","mag":"3003583619"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.5753/sbbd.2019.8820","pdf_url":"https://sol.sbc.org.br/index.php/sbbd/article/download/8820/8721","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://sol.sbc.org.br/index.php/sbbd/article/download/8820/8721","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048012094","display_name":"Vin\u00edcius M. R. Cousseau","orcid":null},"institutions":[{"id":"https://openalex.org/I4210159576","display_name":"Indigenous Language Institute","ror":"https://ror.org/05213zp13","country_code":"US","type":"other","lineage":["https://openalex.org/I4210159576"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vin\u00edcius M. R. Cousseau","raw_affiliation_strings":["In-Loco"],"affiliations":[{"raw_affiliation_string":"In-Loco","institution_ids":["https://openalex.org/I4210159576"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000683692","display_name":"Luciano Barbosa","orcid":"https://orcid.org/0000-0002-6858-4773"},"institutions":[{"id":"https://openalex.org/I25112270","display_name":"Universidade Federal de Pernambuco","ror":"https://ror.org/047908t24","country_code":"BR","type":"education","lineage":["https://openalex.org/I25112270"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Luciano Barbosa","raw_affiliation_strings":["Centro de Inform\u00e1tica, Universidade Federal de Pernambuco, Recife -PE -Brazil"],"affiliations":[{"raw_affiliation_string":"Centro de Inform\u00e1tica, Universidade Federal de Pernambuco, Recife -PE -Brazil","institution_ids":["https://openalex.org/I25112270"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.227,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":1,"citation_normalized_percentile":{"value":0.335495,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":61,"max":69},"biblio":{"volume":null,"issue":null,"first_page":"181","last_page":"186"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9502,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9494,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/linkage","display_name":"Linkage (software)","score":0.63456386},{"id":"https://openalex.org/keywords/record-linkage","display_name":"Record Linkage","score":0.5494366},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization","score":0.5047177}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7524072},{"id":"https://openalex.org/C31266012","wikidata":"https://www.wikidata.org/wiki/Q6554340","display_name":"Linkage (software)","level":3,"score":0.63456386},{"id":"https://openalex.org/C142210648","wikidata":"https://www.wikidata.org/wiki/Q1266546","display_name":"Record linkage","level":3,"score":0.5494366},{"id":"https://openalex.org/C58754882","wikidata":"https://www.wikidata.org/wiki/Q1502887","display_name":"Geodetic datum","level":2,"score":0.5487769},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.53575766},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.5047177},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.45408678},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.45335072},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4137133},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.40146896},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.38323277},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3342604},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.33408517},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08832356},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.07393575},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.5753/sbbd.2019.8820","pdf_url":"https://sol.sbc.org.br/index.php/sbbd/article/download/8820/8721","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.5753/sbbd.2019.8820","pdf_url":"https://sol.sbc.org.br/index.php/sbbd/article/download/8820/8721","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"score":0.69,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":5,"referenced_works":["https://openalex.org/W2031250218","https://openalex.org/W2047197963","https://openalex.org/W2163756102","https://openalex.org/W2767216760","https://openalex.org/W4291172695"],"related_works":["https://openalex.org/W4310568775","https://openalex.org/W3211905090","https://openalex.org/W3012491082","https://openalex.org/W2964093098","https://openalex.org/W2808916796","https://openalex.org/W2487032012","https://openalex.org/W2211355040","https://openalex.org/W2178148352","https://openalex.org/W2176311362","https://openalex.org/W1936317645"],"abstract_inverted_index":{"Extracting":[0],"data":[1,79],"about":[2],"entities":[3],"from":[4,51],"the":[5,11,46,58,72,77],"Web":[6],"has":[7],"become":[8],"commonplace":[9],"in":[10,34,76],"industry":[12],"and":[13,97],"academia":[14],"alike.":[15],"Web-based":[16],"entities,":[17],"however,":[18],"are":[19],"inherently":[20],"noisy":[21],"and,":[22],"as":[23],"such,":[24],"introduce":[25],"several":[26],"normalization":[27],"issues":[28],"which":[29,43,89],"must":[30],"be":[31],"attended":[32],"to":[33,36,45],"order":[35],"maintain":[37],"a":[38,67,87,91],"clean":[39],"database.":[40],"Record":[41],"linkage,":[42],"refers":[44],"detection":[47],"of":[48,57,61,95],"replicated":[49],"datum":[50],"possibly":[52],"multiple":[53],"sources,":[54],"is":[55],"one":[56],"most":[59],"critical":[60],"those":[62],"issues.":[63],"This":[64],"paper":[65],"presents":[66],"practical":[68],"approach":[69],"for":[70],"solving":[71],"record":[73],"linkage":[74],"problem":[75],"places":[78],"domain":[80],"at":[81],"an":[82,98],"industrial":[83],"scale,":[84],"displaying":[85],"both":[86],"model":[88],"reaches":[90],"normalized":[92],"Gini":[93],"coefficient":[94],"0.92,":[96],"architecture":[99],"that":[100],"supports":[101],"large-scale":[102],"processing.":[103]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3003583619","counts_by_year":[{"year":2021,"cited_by_count":1}],"updated_date":"2024-12-18T16:53:00.905864","created_date":"2020-02-07"}