{"id":"https://openalex.org/W2168561231","doi":"https://doi.org/10.1109/icde.2009.138","title":"Join Optimization of Information Extraction Output: Quality Matters!","display_name":"Join Optimization of Information Extraction Output: Quality Matters!","publication_year":2009,"publication_date":"2009-03-01","ids":{"openalex":"https://openalex.org/W2168561231","doi":"https://doi.org/10.1109/icde.2009.138","mag":"2168561231"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icde.2009.138","pdf_url":null,"source":{"id":"https://openalex.org/S4210210321","display_name":"Proceedings - International Conference on Data Engineering","issn_l":"1084-4627","issn":["1084-4627","2375-0286"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110209854","display_name":"Alpa Jain","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alpa Jain","raw_affiliation_strings":["Columbia University, USA"],"affiliations":[{"raw_affiliation_string":"Columbia University, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010731709","display_name":"Panagiotis G. Ipeirotis","orcid":"https://orcid.org/0000-0002-2966-7402"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Panagiotis G. Ipeirotis","raw_affiliation_strings":["New York University, USA"],"affiliations":[{"raw_affiliation_string":"New York University, USA","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110256670","display_name":"AnHai Doan","orcid":null},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"AnHai Doan","raw_affiliation_strings":["University of Wisconsin, Madison, USA"],"affiliations":[{"raw_affiliation_string":"University of Wisconsin, Madison, USA","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080063580","display_name":"Luis Gravano","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Luis Gravano","raw_affiliation_strings":["Columbia University, USA"],"affiliations":[{"raw_affiliation_string":"Columbia University, USA","institution_ids":["https://openalex.org/I78577930"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":18.196,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":20,"citation_normalized_percentile":{"value":0.936706,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":88,"max":89},"biblio":{"volume":null,"issue":null,"first_page":"186","last_page":"197"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/join","display_name":"Join (topology)","score":0.91425306}],"concepts":[{"id":"https://openalex.org/C2776124973","wikidata":"https://www.wikidata.org/wiki/Q3183033","display_name":"Join (topology)","level":2,"score":0.91425306},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8452653},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.6819531},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.6152014},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.50733346},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.41162145},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.38417184},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.32104933},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.15801162},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icde.2009.138","pdf_url":null,"source":{"id":"https://openalex.org/S4210210321","display_name":"Proceedings - International Conference on Data Engineering","issn_l":"1084-4627","issn":["1084-4627","2375-0286"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality education","score":0.41,"id":"https://metadata.un.org/sdg/4"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":18,"referenced_works":["https://openalex.org/W1517178556","https://openalex.org/W158294556","https://openalex.org/W1591609838","https://openalex.org/W2020147322","https://openalex.org/W2035266017","https://openalex.org/W2045463753","https://openalex.org/W2090765848","https://openalex.org/W2092819480","https://openalex.org/W2096797897","https://openalex.org/W2096891167","https://openalex.org/W2100485384","https://openalex.org/W2103931177","https://openalex.org/W2144416276","https://openalex.org/W2155737120","https://openalex.org/W2169015768","https://openalex.org/W2207067200","https://openalex.org/W2426119782","https://openalex.org/W4229903866"],"related_works":["https://openalex.org/W794462722","https://openalex.org/W4392498349","https://openalex.org/W4392216655","https://openalex.org/W4256664196","https://openalex.org/W4205996836","https://openalex.org/W3214148052","https://openalex.org/W2807741550","https://openalex.org/W2151692181","https://openalex.org/W2093960938","https://openalex.org/W2029625042"],"abstract_inverted_index":{"Information":[0],"extraction":[1],"(IE)":[2],"systems":[3,23,83,124],"are":[4,84],"trained":[5],"to":[6,26,47,94,126,135],"extract":[7],"specific":[8],"relations":[9],"from":[10],"text":[11,197],"databases.":[12],"Real-world":[13],"applications":[14],"often":[15],"require":[16],"that":[17,111],"the":[18,28,34,54,57,66,102,112,119,122,130,140,155,162,167,173,183],"output":[19,59,99,113,156],"of":[20,30,36,39,56,61,77,121,160,166,175,185],"multiple":[21,40],"IE":[22,82,123,201],"be":[24],"joined":[25],"produce":[27,74],"data":[29],"interest.":[31],"To":[32],"optimize":[33],"execution":[35,50,71,163,169],"a":[37,91,186,191],"join":[38,58,70,75,103,142,188],"extracted":[41,107],"relations,":[42],"it":[43],"is":[44,60,115],"not":[45],"sufficient":[46],"consider":[48],"only":[49],"time.":[51],"In":[52,86],"fact,":[53],"quality":[55,80,100,114,157],"critical":[62],"importance:":[63],"unlike":[64],"in":[65],"relational":[67],"world,":[68],"different":[69,79],"plans":[72],"can":[73],"results":[76],"widely":[78],"whenever":[81],"involved.":[85],"this":[87],"paper,":[88],"we":[89],"develop":[90],"principled":[92],"approach":[93],"understand,":[95],"estimate,":[96],"and":[97,138,153,199],"incorporate":[98],"into":[101],"optimization":[104],"process":[105,127],"over":[106,195],"relations.":[108],"We":[109,171],"argue":[110],"affected":[116],"by":[117],"(a)":[118],"configuration":[120],"used":[125,134],"documents,":[128,137],"(b)":[129],"document":[131],"retrieval":[132],"strategies":[133],"retrieve":[136],"(c)":[139],"actual":[141],"algorithm":[143],"used.":[144],"Our":[145],"analysis":[146],"considers":[147],"several":[148],"alternatives":[149],"for":[150],"these":[151],"factors,":[152],"predicts":[154],"-":[158,165],"and,":[159],"course,":[161],"time":[164],"alternate":[168],"plans.":[170],"establish":[172],"accuracy":[174],"our":[176],"analytical":[177],"models,":[178],"as":[179,181],"well":[180],"study":[182],"effectiveness":[184],"quality-aware":[187],"optimizer,":[189],"with":[190],"large-scale":[192],"experimental":[193],"evaluation":[194],"real-world":[196],"collections":[198],"state-of-the-art":[200],"systems.":[202]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2168561231","counts_by_year":[{"year":2016,"cited_by_count":1},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":5}],"updated_date":"2025-01-18T14:58:51.016095","created_date":"2016-06-24"}