{"id":"https://openalex.org/W2079973394","doi":"https://doi.org/10.1145/1859127.1859139","title":"Popularity-guided top- k extraction of entity attributes","display_name":"Popularity-guided top- k extraction of entity attributes","publication_year":2010,"publication_date":"2010-06-06","ids":{"openalex":"https://openalex.org/W2079973394","doi":"https://doi.org/10.1145/1859127.1859139","mag":"2079973394"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/1859127.1859139","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080229345","display_name":"Matthew Solomon","orcid":"https://orcid.org/0000-0002-3285-5842"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Matthew Solomon","raw_affiliation_strings":["Columbia University"],"affiliations":[{"raw_affiliation_string":"Columbia University","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043275971","display_name":"Cong Yu","orcid":"https://orcid.org/0000-0001-7331-2345"},"institutions":[{"id":"https://openalex.org/I2800095910","display_name":"Yahoo (Spain)","ror":"https://ror.org/03gq8sg42","country_code":"ES","type":"company","lineage":["https://openalex.org/I2800095910","https://openalex.org/I4210134091"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Cong Yu","raw_affiliation_strings":["Yahoo! research,"],"affiliations":[{"raw_affiliation_string":"Yahoo! research,","institution_ids":["https://openalex.org/I2800095910"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080063580","display_name":"Luis Gravano","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Luis Gravano","raw_affiliation_strings":["Columbia University"],"affiliations":[{"raw_affiliation_string":"Columbia University","institution_ids":["https://openalex.org/I78577930"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.972,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.651405,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":82,"max":83},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9973,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9954,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/popularity","display_name":"Popularity","score":0.6335128},{"id":"https://openalex.org/keywords/data-extraction","display_name":"Data extraction","score":0.43775356}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8407051},{"id":"https://openalex.org/C2780586970","wikidata":"https://www.wikidata.org/wiki/Q1357284","display_name":"Popularity","level":2,"score":0.6335128},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.61373407},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.57975894},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.51451707},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.50480926},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.48492983},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.43775356},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.42127955},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.38078278},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/1859127.1859139","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[{"funder":"https://openalex.org/F4320337389","funder_display_name":"Division of Information and Intelligent Systems","award_id":"IIS-08-11038"}],"datasets":[],"versions":[],"referenced_works_count":12,"referenced_works":["https://openalex.org/W136059505","https://openalex.org/W1493490255","https://openalex.org/W157725869","https://openalex.org/W158294556","https://openalex.org/W2011039300","https://openalex.org/W2099797738","https://openalex.org/W2103931177","https://openalex.org/W2104896715","https://openalex.org/W2128384372","https://openalex.org/W2129214100","https://openalex.org/W2134206624","https://openalex.org/W2138621811"],"related_works":["https://openalex.org/W4294565801","https://openalex.org/W2952704802","https://openalex.org/W2741781807","https://openalex.org/W2518037665","https://openalex.org/W2477036161","https://openalex.org/W2384861574","https://openalex.org/W2368605798","https://openalex.org/W2368049389","https://openalex.org/W2348524959","https://openalex.org/W2170801710"],"abstract_inverted_index":{"Recent":[0],"progress":[1],"in":[2,21,194],"information":[3,80,114,187],"extraction":[4,27,81,106,115,129,172,209],"technology":[5],"has":[6],"enabled":[7],"a":[8,117,127,160,202,229],"vast":[9],"array":[10],"of":[11,28,55,145,152,182,196,222],"applications":[12,39],"that":[13,18,132],"rely":[14],"on":[15,51,97],"structured":[16,42],"data":[17],"is":[19,85,116],"embedded":[20],"natural-language":[22],"text.":[23],"In":[24,46],"particular,":[25],"the":[26,31,83,98,135,149,153,156,171,180,183,186,191,215,220],"concepts":[29],"from":[30,75,174,201],"Web---with":[32],"their":[33,64],"desired":[34],"attributes---is":[35],"important":[36,53],"to":[37,44,69,159,213],"provide":[38],"with":[40,228],"rich,":[41],"access":[43],"information.":[45],"this":[47],"paper,":[48],"we":[49,147,226],"focus":[50],"an":[52],"family":[54],"concepts,":[56],"namely,":[57],"entities":[58,221],"(e.g.,":[59],"people":[60],"or":[61,102],"organizations)":[62],"and":[63,66,71,90,137,143],"attributes,":[65],"study":[67],"how":[68],"efficiently":[70],"effectively":[72],"extract":[73],"them":[74],"Web-accessible":[76],"text":[77],"documents.":[78],"Unfortunately,":[79],"over":[82,233],"Web":[84,99],"challenging":[86],"for":[87,140,155,163,219],"both":[88,134],"quality":[89,136],"efficiency":[91,138],"reasons.":[92],"Regarding":[93,112],"quality,":[94],"many":[95],"sources":[96],"contain":[100],"misleading":[101],"invalid":[103],"information;":[104],"furthermore,":[105],"systems":[107],"often":[108,120],"return":[109,148],"incorrect":[110],"data.":[111,235],"efficiency,":[113],"time-consuming":[118],"process,":[119],"involving":[121],"expensive":[122],"text-processing":[123],"steps.":[124],"We":[125,189],"present":[126],"top-k":[128,150,208],"processing":[130,210],"approach":[131,211],"addresses":[133],"challenges:":[139],"each":[141],"entity":[142,157],"attribute":[144,154,165,217],"interest,":[146],"values":[151,218],"according":[158],"scoring":[161,168],"function":[162,169],"extracted":[164],"values.":[166],"This":[167],"weighs":[170],"confidence":[173],"individual":[175],"documents,":[176],"as":[177,179,225],"well":[178],"\"importance\"":[181],"documents":[184],"where":[185],"originates.":[188],"define":[190],"document":[192,198],"importance":[193],"terms":[195],"entity-specific":[197],"\"popularity\"":[199],"statistics":[200],"major":[203],"search":[204],"engine.":[205],"Overall,":[206],"our":[207],"manages":[212],"identify":[214],"top":[216],"interest":[223],"efficiently,":[224],"demonstrate":[227],"large-scale":[230],"experimental":[231],"evaluation":[232],"real-life":[234]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2079973394","counts_by_year":[{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2025-01-21T06:09:02.103053","created_date":"2016-06-24"}