{"id":"https://openalex.org/W2946504770","doi":"https://doi.org/10.1145/3308558.3313578","title":"Auto-EM: End-to-end Fuzzy Entity-Matching using Pre-trained Deep Models and Transfer Learning","display_name":"Auto-EM: End-to-end Fuzzy Entity-Matching using Pre-trained Deep Models and Transfer Learning","publication_year":2019,"publication_date":"2019-05-13","ids":{"openalex":"https://openalex.org/W2946504770","doi":"https://doi.org/10.1145/3308558.3313578","mag":"2946504770"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3308558.3313578","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100767050","display_name":"Chen Zhao","orcid":"https://orcid.org/0000-0002-6400-0048"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"funder","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chen Zhao","raw_affiliation_strings":["University of Maryland, USA"],"affiliations":[{"raw_affiliation_string":"University of Maryland, USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034908019","display_name":"Yeye He","orcid":"https://orcid.org/0000-0003-2824-5299"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"funder","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yeye He","raw_affiliation_strings":["Microsoft, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft, USA","institution_ids":["https://openalex.org/I1290206253"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":15.679,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":103,"citation_normalized_percentile":{"value":0.946411,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2413","last_page":"2424"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9985,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9785,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.72876996},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.4986272},{"id":"https://openalex.org/keywords/linkage","display_name":"Linkage (software)","score":0.48488933},{"id":"https://openalex.org/keywords/record-linkage","display_name":"Record Linkage","score":0.43522394}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.76049924},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.72876996},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.68153036},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.6291598},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.59512997},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5918336},{"id":"https://openalex.org/C58166","wikidata":"https://www.wikidata.org/wiki/Q224821","display_name":"Fuzzy logic","level":2,"score":0.57910335},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.56556565},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.4986272},{"id":"https://openalex.org/C2776650193","wikidata":"https://www.wikidata.org/wiki/Q264661","display_name":"Obstacle","level":2,"score":0.49772456},{"id":"https://openalex.org/C31266012","wikidata":"https://www.wikidata.org/wiki/Q6554340","display_name":"Linkage (software)","level":3,"score":0.48488933},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.46233293},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.46209708},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.44344696},{"id":"https://openalex.org/C142210648","wikidata":"https://www.wikidata.org/wiki/Q1266546","display_name":"Record linkage","level":3,"score":0.43522394},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.18337119},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3308558.3313578","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.49}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":73,"referenced_works":["https://openalex.org/W102708294","https://openalex.org/W1529533208","https://openalex.org/W1742677423","https://openalex.org/W1832693441","https://openalex.org/W1840435438","https://openalex.org/W1958077162","https://openalex.org/W1964189668","https://openalex.org/W2005708641","https://openalex.org/W2006199065","https://openalex.org/W2016753842","https://openalex.org/W2020278455","https://openalex.org/W2022166150","https://openalex.org/W2041439319","https://openalex.org/W2053062910","https://openalex.org/W2067566391","https://openalex.org/W2080133951","https://openalex.org/W2094728533","https://openalex.org/W2104511295","https://openalex.org/W2106895292","https://openalex.org/W2108991785","https://openalex.org/W2117130368","https://openalex.org/W2119320829","https://openalex.org/W2123838014","https://openalex.org/W2133676910","https://openalex.org/W2136189984","https://openalex.org/W2138105615","https://openalex.org/W2142920810","https://openalex.org/W2164456230","https://openalex.org/W2164618618","https://openalex.org/W2164625277","https://openalex.org/W2165698076","https://openalex.org/W2171472464","https://openalex.org/W2243524811","https://openalex.org/W2250539671","https://openalex.org/W2251264718","https://openalex.org/W2251818205","https://openalex.org/W2295582178","https://openalex.org/W2300469216","https://openalex.org/W2326452612","https://openalex.org/W2413794162","https://openalex.org/W2469060249","https://openalex.org/W2470673105","https://openalex.org/W2542998387","https://openalex.org/W2574230393","https://openalex.org/W2608787653","https://openalex.org/W2616147950","https://openalex.org/W2626778328","https://openalex.org/W2763940251","https://openalex.org/W2798323405","https://openalex.org/W2798546256","https://openalex.org/W2798649495","https://openalex.org/W2798812533","https://openalex.org/W2808633496","https://openalex.org/W2884499287","https://openalex.org/W2893303656","https://openalex.org/W2899771611","https://openalex.org/W2950133940","https://openalex.org/W2952729433","https://openalex.org/W2957204582","https://openalex.org/W2962739339","https://openalex.org/W2963026768","https://openalex.org/W2963341956","https://openalex.org/W2963626623","https://openalex.org/W2963846996","https://openalex.org/W2964308564","https://openalex.org/W3004104752","https://openalex.org/W3011807731","https://openalex.org/W3099023595","https://openalex.org/W3099883947","https://openalex.org/W3122775348","https://openalex.org/W4246012463","https://openalex.org/W4300906944","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W3196421258","https://openalex.org/W3179968364","https://openalex.org/W3171384686","https://openalex.org/W2938107654","https://openalex.org/W2544610230","https://openalex.org/W2284149529","https://openalex.org/W2151749779","https://openalex.org/W2113336906","https://openalex.org/W1971084186","https://openalex.org/W1532588365"],"abstract_inverted_index":{"Entity":[0],"matching":[1],"(EM),":[2],"also":[3],"known":[4],"as":[5],"entity":[6],"resolution,":[7],"fuzzy":[8],"join,":[9],"and":[10,34,40],"record":[11],"linkage,":[12],"refers":[13],"to":[14,21,89,97],"the":[15,22,51],"process":[16],"of":[17,53,66],"identifying":[18],"records":[19],"corresponding":[20],"same":[23],"real-world":[24,72],"entities":[25],"from":[26],"different":[27],"data":[28,38,41,68],"sources.":[29],"It":[30],"is":[31,69,87],"an":[32],"important":[33],"long-standing":[35],"problem":[36],"in":[37,50],"integration":[39],"mining.":[42],"So":[43],"far":[44],"progresses":[45],"have":[46],"been":[47],"made":[48],"mainly":[49],"form":[52],"model":[54],"improvements,":[55],"where":[56],"models":[57],"with":[58],"better":[59],"accuracy":[60],"are":[61],"developed":[62],"when":[63],"large":[64],"amounts":[65],"training":[67],"available.":[70],"In":[71],"applications":[73],"we":[74],"find":[75],"that":[76,86],"advanced":[77],"approaches":[78],"can":[79],"often":[80],"require":[81],"too":[82],"many":[83],"labeled":[84],"examples":[85],"expensive":[88],"obtain,":[90],"which":[91],"has":[92],"become":[93],"a":[94],"key":[95],"obstacle":[96],"wider":[98],"adoption.":[99]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2946504770","counts_by_year":[{"year":2024,"cited_by_count":16},{"year":2023,"cited_by_count":18},{"year":2022,"cited_by_count":18},{"year":2021,"cited_by_count":29},{"year":2020,"cited_by_count":19},{"year":2019,"cited_by_count":3}],"updated_date":"2025-02-18T08:22:17.970282","created_date":"2019-05-29"}