{"id":"https://openalex.org/W2911568967","doi":"https://doi.org/10.1109/icdmw.2018.00083","title":"Large Database Schema Matching using Data Mining Techniques","display_name":"Large Database Schema Matching using Data Mining Techniques","publication_year":2018,"publication_date":"2018-11-01","ids":{"openalex":"https://openalex.org/W2911568967","doi":"https://doi.org/10.1109/icdmw.2018.00083","mag":"2911568967"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdmw.2018.00083","pdf_url":null,"source":{"id":"https://openalex.org/S4363608174","display_name":"2022 IEEE International Conference on Data Mining Workshops (ICDMW)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068340505","display_name":"Debora Gomes dos Reis","orcid":null},"institutions":[{"id":"https://openalex.org/I150729083","display_name":"Universidade de Bras\u00edlia","ror":"https://ror.org/02xfp8v59","country_code":"BR","type":"funder","lineage":["https://openalex.org/I150729083"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Debora Gomes dos Reis","raw_affiliation_strings":["dept. Computer Science, University of Brasilia (UnB), Brasilia, DF, Brazil"],"affiliations":[{"raw_affiliation_string":"dept. Computer Science, University of Brasilia (UnB), Brasilia, DF, Brazil","institution_ids":["https://openalex.org/I150729083"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106404831","display_name":"Marcelo Ladeira","orcid":"https://orcid.org/0000-0003-1542-6293"},"institutions":[{"id":"https://openalex.org/I150729083","display_name":"Universidade de Bras\u00edlia","ror":"https://ror.org/02xfp8v59","country_code":"BR","type":"funder","lineage":["https://openalex.org/I150729083"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Marcelo Ladeira","raw_affiliation_strings":["dept. Computer Science, University of Brasilia (UnB), Brasilia, DF, Brazil"],"affiliations":[{"raw_affiliation_string":"dept. Computer Science, University of Brasilia (UnB), Brasilia, DF, Brazil","institution_ids":["https://openalex.org/I150729083"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054089208","display_name":"Maristela Holanda","orcid":"https://orcid.org/0000-0002-0883-2579"},"institutions":[{"id":"https://openalex.org/I150729083","display_name":"Universidade de Bras\u00edlia","ror":"https://ror.org/02xfp8v59","country_code":"BR","type":"funder","lineage":["https://openalex.org/I150729083"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Maristela Holanda","raw_affiliation_strings":["dept. Computer Science, University of Brasilia (UnB), Brasilia, DF, Brazil"],"affiliations":[{"raw_affiliation_string":"dept. Computer Science, University of Brasilia (UnB), Brasilia, DF, Brazil","institution_ids":["https://openalex.org/I150729083"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026224413","display_name":"M\u00e1rcio Victorino","orcid":"https://orcid.org/0000-0003-2785-8958"},"institutions":[{"id":"https://openalex.org/I150729083","display_name":"Universidade de Bras\u00edlia","ror":"https://ror.org/02xfp8v59","country_code":"BR","type":"funder","lineage":["https://openalex.org/I150729083"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Marcio de Carvalho Victorino","raw_affiliation_strings":["Faculty of Information Science, University of Brasilia (UnB), Brasilia, DF, Brazil"],"affiliations":[{"raw_affiliation_string":"Faculty of Information Science, University of Brasilia (UnB), Brasilia, DF, Brazil","institution_ids":["https://openalex.org/I150729083"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.55,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":5,"citation_normalized_percentile":{"value":0.51634,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":79,"max":80},"biblio":{"volume":null,"issue":null,"first_page":"523","last_page":"530"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9995,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9994,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.48088506},{"id":"https://openalex.org/keywords/star-schema","display_name":"Star schema","score":0.4747715},{"id":"https://openalex.org/keywords/information-schema","display_name":"Information schema","score":0.47252518}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.76239884},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.6625656},{"id":"https://openalex.org/C30775581","wikidata":"https://www.wikidata.org/wiki/Q632285","display_name":"Database schema","level":3,"score":0.5902783},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.53842604},{"id":"https://openalex.org/C56310702","wikidata":"https://www.wikidata.org/wiki/Q2269281","display_name":"Semi-structured model","level":4,"score":0.5065297},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.48088506},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.47957557},{"id":"https://openalex.org/C190703929","wikidata":"https://www.wikidata.org/wiki/Q1331138","display_name":"Star schema","level":4,"score":0.4747715},{"id":"https://openalex.org/C150012506","wikidata":"https://www.wikidata.org/wiki/Q6031185","display_name":"Information schema","level":5,"score":0.47252518},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4504373},{"id":"https://openalex.org/C153048206","wikidata":"https://www.wikidata.org/wiki/Q3454922","display_name":"Metadata repository","level":3,"score":0.41737863},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.40566325},{"id":"https://openalex.org/C148840519","wikidata":"https://www.wikidata.org/wiki/Q1049878","display_name":"Database design","level":2,"score":0.34945577},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0845885}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdmw.2018.00083","pdf_url":null,"source":{"id":"https://openalex.org/S4363608174","display_name":"2022 IEEE International Conference on Data Mining Workshops (ICDMW)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.63}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":14,"referenced_works":["https://openalex.org/W1533117389","https://openalex.org/W1547612978","https://openalex.org/W1998982581","https://openalex.org/W2008896880","https://openalex.org/W2085478182","https://openalex.org/W2095708598","https://openalex.org/W2111998194","https://openalex.org/W2120718782","https://openalex.org/W2138745488","https://openalex.org/W2139135093","https://openalex.org/W2160683489","https://openalex.org/W2168996210","https://openalex.org/W2776474505","https://openalex.org/W2783642575"],"related_works":["https://openalex.org/W4238907651","https://openalex.org/W2153512431","https://openalex.org/W2103472145","https://openalex.org/W2092058806","https://openalex.org/W1992271858","https://openalex.org/W1985081702","https://openalex.org/W166845585","https://openalex.org/W1591654213","https://openalex.org/W1513459549","https://openalex.org/W1512552654"],"abstract_inverted_index":{"With":[0],"the":[1,58,67,70,84,91,99,107,236,244,247,260,270,289,293,307],"expanding":[2],"diversity":[3],"of":[4,44,57,62,69,75,86,105,162,216,246,292,300,309,327,329,345],"database":[5,8],"technologies":[6],"and":[7,195,211,227,268,296,319,348],"sizes,":[9],"it":[10,264,341],"is":[11,52,64,79,114,173,209,240,339],"becoming":[12],"increasingly":[13],"hard":[14],"to":[15,34,39,66,83,97,109,184,242,287,306],"identify":[16,41],"similar":[17,42,125,165,272],"relational":[18,45],"databases":[19,23,46],"among":[20],"many":[21],"large":[22],"stored":[24],"in":[25,81,159,180,277,323],"different":[26,204],"Database":[27],"Management":[28],"Systems":[29],"(DBMS).":[30],"Therefore,":[31],"we":[32,95],"propose":[33,96],"use":[35],"data":[36,302],"mining":[37],"techniques":[38],"automatically":[40],"structures":[43],"by":[47,54,274],"comparing":[48,214],"their":[49],"metadata,":[50,187],"which":[51,113,304],"composed":[53],"physical":[55],"details":[56],"databases.":[59],"The":[60,73,139,154,206,219,282],"amount":[61],"metadata":[63,207,284,294],"proportional":[65],"size":[68],"schema":[71,100,248,267,273,346],"structure.":[72],"possibilities":[74],"combinations":[76],"for":[77,90,150,213],"comparison":[78],"quadratic":[80],"relation":[82],"number":[85],"schemas":[87,108,119,127,144,148,158,163,178,200,316],"analyzed.":[88],"Looking":[89],"most":[92,250,261,271],"efficient":[93,262],"technique,":[94],"calculate":[98],"similarity":[101],"evaluating":[102],"a":[103,115,160,217,301,324,343],"distance":[104],"all":[106,143,146,266],"just":[110],"one":[111,253],"schema,":[112],"start":[116],"point.":[117],"Obviously":[118],"with":[120,128,164,175],"close":[121],"distances":[122],"are":[123,222,230],"more":[124,190],"than":[126,191,279],"bigger":[129],"distances.":[130],"We":[131],"compare":[132],"this":[133,313],"proposal":[134],"against":[135,145],"two":[136],"other":[137],"approaches.":[138],"first":[140,290],"approach":[141,156],"compares":[142,157],"another":[147],"except":[149],"its":[151,275],"inverse":[152],"comparison.":[153],"second":[155],"group":[161],"sizes.":[166],"To":[167],"validate":[168],"our":[169,258],"proposal,":[170],"an":[171,297],"experiment":[172],"performed":[174],"354":[176],"real":[177],"ranging":[179],"sizes":[181,347],"from":[182,202],"2":[183,280],"20":[185],"thousand":[186,193,197],"totaling":[188],"together":[189],"26":[192],"tables":[194],"238":[196],"columns.":[198],"Those":[199],"came":[201],"5":[203],"DBMS.":[205,349],"extracted":[208,283],"transformed":[210],"formatted":[212],"pairs":[215],"schema.":[218],"textual":[220],"features":[221,229],"compared":[223,231,265],"using":[224,232],"Cosine":[225],"Distance":[226],"numerical":[228],"Euclidean":[233],"Distance.":[234],"Then,":[235],"hierarchical":[237],"cluster":[238],"technique":[239],"used":[241,286],"facilitate":[243],"visualization":[245],"that":[249],"closely":[251],"resembled":[252],"another.":[254],"Results":[255],"showed":[256],"that,":[257],"was":[259,285],"because":[263],"identified":[269],"structure":[276],"less":[278],"minutes.":[281],"create":[288],"version":[291,299],"repository":[295],"initial":[298],"catalog,":[303],"contributed":[305],"knowledge":[308],"existing":[310],"data.":[311],"Using":[312],"procedure,":[314],"duplicated":[315],"were":[317],"discovered":[318],"then":[320],"discontinued,":[321],"resulting":[322],"cost":[325,330],"savings":[326],"10%":[328],"savings,":[331],"while":[332],"freeing":[333],"up":[334],"infrastructure":[335],"resources.":[336],"This":[337],"solution":[338],"flexible,":[340],"supports":[342],"variety":[344]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2911568967","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3}],"updated_date":"2025-02-22T21:18:33.178922","created_date":"2019-02-21"}