{"id":"https://openalex.org/W2917275779","doi":"https://doi.org/10.1587/transinf.2018edl8130","title":"Mining Approximate Primary Functional Dependency on Web Tables","display_name":"Mining Approximate Primary Functional Dependency on Web Tables","publication_year":2019,"publication_date":"2019-02-28","ids":{"openalex":"https://openalex.org/W2917275779","doi":"https://doi.org/10.1587/transinf.2018edl8130","mag":"2917275779"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1587/transinf.2018edl8130","pdf_url":"https://www.jstage.jst.go.jp/article/transinf/E102.D/3/E102.D_2018EDL8130/_pdf","source":{"id":"https://openalex.org/S2486202937","display_name":"IEICE Transactions on Information and Systems","issn_l":"0916-8532","issn":["0916-8532","1745-1361"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4320800604","host_organization_name":"Institute of Electronics, Information and Communication Engineers","host_organization_lineage":["https://openalex.org/P4320800604"],"host_organization_lineage_names":["Institute of Electronics, Information and Communication Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.jstage.jst.go.jp/article/transinf/E102.D/3/E102.D_2018EDL8130/_pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100325167","display_name":"Siyu Chen","orcid":"https://orcid.org/0000-0001-8350-7605"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siyu CHEN","raw_affiliation_strings":["School of Computer and Information Technology, Beijing Jiaotong University"],"affiliations":[{"raw_affiliation_string":"School of Computer and Information Technology, Beijing Jiaotong University","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100387096","display_name":"Ning Wang","orcid":"https://orcid.org/0000-0001-8903-8790"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ning WANG","raw_affiliation_strings":["School of Computer and Information Technology, Beijing Jiaotong University"],"affiliations":[{"raw_affiliation_string":"School of Computer and Information Technology, Beijing Jiaotong University","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100442185","display_name":"Mengmeng Zhang","orcid":"https://orcid.org/0000-0002-2016-741X"},"institutions":[{"id":"https://openalex.org/I1456306","display_name":"North China University of Technology","ror":"https://ror.org/01nky7652","country_code":"CN","type":"education","lineage":["https://openalex.org/I1456306"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengmeng ZHANG","raw_affiliation_strings":["North China University of Technology"],"affiliations":[{"raw_affiliation_string":"North China University of Technology","institution_ids":["https://openalex.org/I1456306"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":62},"biblio":{"volume":"E102.D","issue":"3","first_page":"650","last_page":"654"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10538","display_name":"Data Mining Techniques and Applications","score":0.9972,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10538","display_name":"Data Mining Techniques and Applications","score":0.9972,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Trajectory Data Mining and Analysis","score":0.9957,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Data Stream Management Systems and Techniques","score":0.9951,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.7149807},{"id":"https://openalex.org/keywords/approximate-query-processing","display_name":"Approximate Query Processing","score":0.568633},{"id":"https://openalex.org/keywords/temporal-data-mining","display_name":"Temporal Data Mining","score":0.527665},{"id":"https://openalex.org/keywords/probabilistic-databases","display_name":"Probabilistic Databases","score":0.524755},{"id":"https://openalex.org/keywords/trajectory-data-mining","display_name":"Trajectory Data Mining","score":0.51237},{"id":"https://openalex.org/keywords/query-optimization","display_name":"Query Optimization","score":0.503688}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.875051},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.7149807},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.6996748},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.59980476},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5780535},{"id":"https://openalex.org/C64869954","wikidata":"https://www.wikidata.org/wiki/Q1859747","display_name":"False positive paradox","level":2,"score":0.52242726},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.50313705},{"id":"https://openalex.org/C2780551164","wikidata":"https://www.wikidata.org/wiki/Q2306599","display_name":"Column (typography)","level":3,"score":0.44706303},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4159422},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39920628},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.27736616},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.18574199},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1587/transinf.2018edl8130","pdf_url":"https://www.jstage.jst.go.jp/article/transinf/E102.D/3/E102.D_2018EDL8130/_pdf","source":{"id":"https://openalex.org/S2486202937","display_name":"IEICE Transactions on Information and Systems","issn_l":"0916-8532","issn":["0916-8532","1745-1361"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4320800604","host_organization_name":"Institute of Electronics, Information and Communication Engineers","host_organization_lineage":["https://openalex.org/P4320800604"],"host_organization_lineage_names":["Institute of Electronics, Information and Communication Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1587/transinf.2018edl8130","pdf_url":"https://www.jstage.jst.go.jp/article/transinf/E102.D/3/E102.D_2018EDL8130/_pdf","source":{"id":"https://openalex.org/S2486202937","display_name":"IEICE Transactions on Information and Systems","issn_l":"0916-8532","issn":["0916-8532","1745-1361"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4320800604","host_organization_name":"Institute of Electronics, Information and Communication Engineers","host_organization_lineage":["https://openalex.org/P4320800604"],"host_organization_lineage_names":["Institute of Electronics, Information and Communication Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":9,"referenced_works":["https://openalex.org/W102383941","https://openalex.org/W2077737382","https://openalex.org/W2092364718","https://openalex.org/W2102489964","https://openalex.org/W2108223890","https://openalex.org/W2184196111","https://openalex.org/W2325504296","https://openalex.org/W2618188015","https://openalex.org/W3106124465"],"related_works":["https://openalex.org/W3209204065","https://openalex.org/W2389214306","https://openalex.org/W2295196644","https://openalex.org/W2183246718","https://openalex.org/W2164205946","https://openalex.org/W2160907113","https://openalex.org/W2099261052","https://openalex.org/W2067317451","https://openalex.org/W1755711892","https://openalex.org/W1557094818"],"abstract_inverted_index":{"We":[0],"propose":[1,46],"to":[2,51,62,78],"discover":[3],"approximate":[4,72],"primary":[5,19],"functional":[6],"dependency":[7],"(aPFD)":[8],"for":[9,28],"web":[10,36,87],"tables,":[11],"which":[12],"focus":[13],"on":[14,35,39,85],"the":[15],"determination":[16],"relationship":[17],"between":[18],"attributes":[20,23],"and":[21,24,32,42,49,58,75,100],"non-primary":[22],"are":[25,76],"more":[26],"helpful":[27],"entity":[29],"column":[30],"detection":[31],"topic":[33],"discovery":[34],"tables.":[37,80],"Based":[38],"association":[40],"rules":[41],"information":[43],"theory,":[44],"we":[45],"metrics":[47],"Conf":[48],"InfoGain":[50],"evaluate":[52],"PFDs.":[53],"By":[54],"quantifying":[55],"PFDs'":[56],"strength":[57],"designing":[59],"pruning":[60],"strategies":[61],"eliminate":[63],"false":[64],"positives,":[65],"our":[66,91],"method":[67,92],"could":[68],"select":[69],"minimal":[70],"non-trivial":[71],"PFD":[73],"effectively":[74],"scalable":[77],"large":[79],"The":[81],"comprehensive":[82],"experimental":[83],"results":[84],"real":[86],"datasets":[88],"show":[89],"that":[90],"significantly":[93],"outperforms":[94],"previous":[95],"work":[96],"in":[97],"both":[98],"effectiveness":[99],"efficiency.":[101]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2917275779","counts_by_year":[],"updated_date":"2024-09-19T11:19:39.683058","created_date":"2019-03-02"}