{"id":"https://openalex.org/W2026469784","doi":"https://doi.org/10.1145/1519103.1519108","title":"Building query optimizers for information extraction","display_name":"Building query optimizers for information extraction","publication_year":2009,"publication_date":"2009-03-20","ids":{"openalex":"https://openalex.org/W2026469784","doi":"https://doi.org/10.1145/1519103.1519108","mag":"2026469784"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/1519103.1519108","pdf_url":null,"source":{"id":"https://openalex.org/S47508943","display_name":"ACM SIGMOD Record","issn_l":"0163-5808","issn":["0163-5808","1943-5835"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110209854","display_name":"Alpa Jain","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alpa Jain","raw_affiliation_strings":["Columbia University"],"affiliations":[{"raw_affiliation_string":"Columbia University","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010731709","display_name":"Panagiotis G. Ipeirotis","orcid":"https://orcid.org/0000-0002-2966-7402"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Panagiotis Ipeirotis","raw_affiliation_strings":["New York University."],"affiliations":[{"raw_affiliation_string":"New York University.","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080063580","display_name":"Luis Gravano","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Luis Gravano","raw_affiliation_strings":["Columbia University"],"affiliations":[{"raw_affiliation_string":"Columbia University","institution_ids":["https://openalex.org/I78577930"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":6.384,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":18,"citation_normalized_percentile":{"value":0.928441,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":88,"max":89},"biblio":{"volume":"37","issue":"4","first_page":"28","last_page":"34"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9986,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9985,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.4301988}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9231571},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.593863},{"id":"https://openalex.org/C192939062","wikidata":"https://www.wikidata.org/wiki/Q104840822","display_name":"Sargable","level":4,"score":0.5789483},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.56986666},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5582186},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.54236954},{"id":"https://openalex.org/C99016210","wikidata":"https://www.wikidata.org/wiki/Q5488129","display_name":"Query expansion","level":2,"score":0.5330765},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5035512},{"id":"https://openalex.org/C192028432","wikidata":"https://www.wikidata.org/wiki/Q845739","display_name":"Query language","level":2,"score":0.46472982},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.4301988},{"id":"https://openalex.org/C118689300","wikidata":"https://www.wikidata.org/wiki/Q7978614","display_name":"Web query classification","level":4,"score":0.411524},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.36093855},{"id":"https://openalex.org/C164120249","wikidata":"https://www.wikidata.org/wiki/Q995982","display_name":"Web search query","level":3,"score":0.34349743},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3330635},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.23790136},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.16920793},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/1519103.1519108","pdf_url":null,"source":{"id":"https://openalex.org/S47508943","display_name":"ACM SIGMOD Record","issn_l":"0163-5808","issn":["0163-5808","1943-5835"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, justice, and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.42}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":23,"referenced_works":["https://openalex.org/W1489949474","https://openalex.org/W157725869","https://openalex.org/W158294556","https://openalex.org/W167355512","https://openalex.org/W2035266017","https://openalex.org/W2045463753","https://openalex.org/W2050078085","https://openalex.org/W2090765848","https://openalex.org/W2096797897","https://openalex.org/W2096891167","https://openalex.org/W2103224511","https://openalex.org/W2103931177","https://openalex.org/W2108126629","https://openalex.org/W2115461474","https://openalex.org/W2125943921","https://openalex.org/W2129214100","https://openalex.org/W2143349571","https://openalex.org/W2144416276","https://openalex.org/W2155737120","https://openalex.org/W2168561231","https://openalex.org/W2426119782","https://openalex.org/W2753710282","https://openalex.org/W3149201378"],"related_works":["https://openalex.org/W3125756434","https://openalex.org/W2572349046","https://openalex.org/W2392799717","https://openalex.org/W2146885082","https://openalex.org/W2124814993","https://openalex.org/W2113390685","https://openalex.org/W2096359267","https://openalex.org/W2026738364","https://openalex.org/W2017989738","https://openalex.org/W1981131819"],"abstract_inverted_index":{"Text":[0],"documents":[1],"often":[2],"embed":[3],"data":[4,12],"that":[5,130],"is":[6,13],"structured":[7,11,23,33,48],"in":[8,60],"nature.":[9],"This":[10,38],"increasingly":[14],"exposed":[15],"using":[16],"information":[17],"extraction":[18],"systems":[19],",":[20],"which":[21,44],"generate":[22],"relations":[24,51,80,87],"from":[25,53,81],"documents,":[26,78,83],"introducing":[27],"an":[28,131,157],"opportunity":[29],"to":[30,135],"process":[31],"expressive,":[32],"queries":[34,49,89],"over":[35,50],"text":[36,54,77],"databases.":[37],"paper":[39],"discusses":[40],"our":[41,61],"SQoUT1":[42],"project,":[43],"focuses":[45],"on":[46,164],"processing":[47,65],"extracted":[52,86],"databases":[55],".":[56],"We":[57,112],"show":[58],"how,":[59],"extraction-based":[62],"scenario,":[63],"query":[64,109,126,162],"can":[66],"be":[67],"decomposed":[68],"into":[69,144],"a":[70,104,125,165],"sequence":[71],"of":[72,94,107,124,169],"basic":[73],"steps:":[74],"retrieving":[75],"relevant":[76],"extracting":[79],"the":[82,120,146,170],"and":[84,100,116,128,152,155],"joining":[85],"for":[88,149,160],"involving":[90],"multiple":[91],"relations.":[92],"Each":[93],"these":[95],"steps":[96],"presents":[97],"different":[98],"alternatives":[99],"together":[101],"they":[102],"form":[103],"rich":[105],"space":[106],"possible":[108],"execution":[110,114,150,158,172],"strategies.":[111,173],"identify":[113],"efficiency":[115,151],"output":[117,153],"quality":[118],"as":[119],"two":[121],"critical":[122],"properties":[123],"execution,":[127],"argue":[129],"optimization":[132],"approach":[133],"needs":[134],"consider":[136],"both":[137],"properties.":[138],"To":[139],"this":[140],"end,":[141],"we":[142],"take":[143],"account":[145],"userspecified":[147],"requirements":[148],"quality,":[154],"choose":[156],"strategy":[159],"each":[161],"based":[163],"principled,":[166],"cost-based":[167],"comparison":[168],"alternative":[171]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2026469784","counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":4}],"updated_date":"2024-12-09T06:22:09.292026","created_date":"2016-06-24"}