{"id":"https://openalex.org/W4367046714","doi":"https://doi.org/10.1145/3543507.3583236","title":"Wikidata as a seed for Web Extraction","display_name":"Wikidata as a seed for Web Extraction","publication_year":2023,"publication_date":"2023-04-26","ids":{"openalex":"https://openalex.org/W4367046714","doi":"https://doi.org/10.1145/3543507.3583236"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3543507.3583236","pdf_url":null,"source":{"id":"https://openalex.org/S4363608783","display_name":"Proceedings of the ACM Web Conference 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2401.07812","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036029871","display_name":"Kunpeng Guo","orcid":"https://orcid.org/0000-0002-0692-0057"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I4210085887","display_name":"Laboratoire Hubert Curien","ror":"https://ror.org/0028p8r67","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I203339264","https://openalex.org/I277688954","https://openalex.org/I4210085887","https://openalex.org/I4210091746","https://openalex.org/I4210095849","https://openalex.org/I59692284","https://openalex.org/I86767153"]},{"id":"https://openalex.org/I86767153","display_name":"Universit\u00e9 Jean Monnet","ror":"https://ror.org/04yznqr36","country_code":"FR","type":"education","lineage":["https://openalex.org/I203339264","https://openalex.org/I86767153"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Kunpeng Guo","raw_affiliation_strings":["The QA Company SAS, France and Laboratoire Hubert Curien, UMR CNRS 5516, Universit\u00e9 Jean Monnet, France"],"affiliations":[{"raw_affiliation_string":"The QA Company SAS, France and Laboratoire Hubert Curien, UMR CNRS 5516, Universit\u00e9 Jean Monnet, France","institution_ids":["https://openalex.org/I1294671590","https://openalex.org/I4210085887","https://openalex.org/I86767153"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073664557","display_name":"Dennis Diefenbach","orcid":"https://orcid.org/0000-0002-0046-2219"},"institutions":[{"id":"https://openalex.org/I4210085887","display_name":"Laboratoire Hubert Curien","ror":"https://ror.org/0028p8r67","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I203339264","https://openalex.org/I277688954","https://openalex.org/I4210085887","https://openalex.org/I4210091746","https://openalex.org/I4210095849","https://openalex.org/I59692284","https://openalex.org/I86767153"]},{"id":"https://openalex.org/I86767153","display_name":"Universit\u00e9 Jean Monnet","ror":"https://ror.org/04yznqr36","country_code":"FR","type":"education","lineage":["https://openalex.org/I203339264","https://openalex.org/I86767153"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Dennis Diefenbach","raw_affiliation_strings":["The QA Company SAS, France and Laboratoire Hubert Curien UMR 5516, Universit\u00e9 Jean Monnet, France"],"affiliations":[{"raw_affiliation_string":"The QA Company SAS, France and Laboratoire Hubert Curien UMR 5516, Universit\u00e9 Jean Monnet, France","institution_ids":["https://openalex.org/I4210085887","https://openalex.org/I86767153"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047732112","display_name":"Antoine Gourru","orcid":"https://orcid.org/0000-0003-3571-2430"},"institutions":[{"id":"https://openalex.org/I4210085887","display_name":"Laboratoire Hubert Curien","ror":"https://ror.org/0028p8r67","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I203339264","https://openalex.org/I277688954","https://openalex.org/I4210085887","https://openalex.org/I4210091746","https://openalex.org/I4210095849","https://openalex.org/I59692284","https://openalex.org/I86767153"]},{"id":"https://openalex.org/I86767153","display_name":"Universit\u00e9 Jean Monnet","ror":"https://ror.org/04yznqr36","country_code":"FR","type":"education","lineage":["https://openalex.org/I203339264","https://openalex.org/I86767153"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Antoine Gourru","raw_affiliation_strings":["Laboratoire Hubert Curien UMR 5516, Universit\u00e9 Jean Monnet, France"],"affiliations":[{"raw_affiliation_string":"Laboratoire Hubert Curien UMR 5516, Universit\u00e9 Jean Monnet, France","institution_ids":["https://openalex.org/I4210085887","https://openalex.org/I86767153"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082775286","display_name":"Christophe Gravier","orcid":"https://orcid.org/0000-0001-8586-6302"},"institutions":[{"id":"https://openalex.org/I4210085887","display_name":"Laboratoire Hubert Curien","ror":"https://ror.org/0028p8r67","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I203339264","https://openalex.org/I277688954","https://openalex.org/I4210085887","https://openalex.org/I4210091746","https://openalex.org/I4210095849","https://openalex.org/I59692284","https://openalex.org/I86767153"]},{"id":"https://openalex.org/I86767153","display_name":"Universit\u00e9 Jean Monnet","ror":"https://ror.org/04yznqr36","country_code":"FR","type":"education","lineage":["https://openalex.org/I203339264","https://openalex.org/I86767153"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Christophe Gravier","raw_affiliation_strings":["Laboratoire Hubert Curien UMR 5516, Universit\u00e9 Jean Monnet, France"],"affiliations":[{"raw_affiliation_string":"Laboratoire Hubert Curien UMR 5516, Universit\u00e9 Jean Monnet, France","institution_ids":["https://openalex.org/I4210085887","https://openalex.org/I86767153"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.361,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.434653,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":66,"max":76},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9984,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9984,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9969,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9967,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/knowledge-graph","display_name":"Knowledge graph","score":0.45621338}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8668602},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5632541},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5364846},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.531908},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5182612},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.4831652},{"id":"https://openalex.org/C2987255567","wikidata":"https://www.wikidata.org/wiki/Q33002955","display_name":"Knowledge graph","level":2,"score":0.45621338},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.43160337},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3543507.3583236","pdf_url":null,"source":{"id":"https://openalex.org/S4363608783","display_name":"Proceedings of the ACM Web Conference 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://hal.science/hal-04428911","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2401.07812","pdf_url":"http://arxiv.org/pdf/2401.07812","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2401.07812","pdf_url":"http://arxiv.org/pdf/2401.07812","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.71,"display_name":"Quality education"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":10,"referenced_works":["https://openalex.org/W102708294","https://openalex.org/W2000411838","https://openalex.org/W2016753842","https://openalex.org/W2080133951","https://openalex.org/W2094728533","https://openalex.org/W2104583100","https://openalex.org/W2946693801","https://openalex.org/W2964072618","https://openalex.org/W3091620232","https://openalex.org/W4285820265"],"related_works":["https://openalex.org/W4385731361","https://openalex.org/W4385489363","https://openalex.org/W4383535523","https://openalex.org/W4220741973","https://openalex.org/W36911888","https://openalex.org/W3135843367","https://openalex.org/W3112355890","https://openalex.org/W3006227201","https://openalex.org/W2613685774","https://openalex.org/W2611741382"],"abstract_inverted_index":{"Wikidata":[0,131,194,225,288],"has":[1],"grown":[2],"to":[3,109,148,157,174,232,274,283],"a":[4,76,104,215,230,245],"knowledge":[5,77,289],"graph":[6,78],"with":[7],"an":[8],"impressive":[9],"size.":[10],"To":[11],"date,":[12],"it":[13],"contains":[14],"more":[15,60],"than":[16],"17":[17],"billion":[18],"triples":[19],"collecting":[20],"information":[21,39,190],"about":[22],"people,":[23],"places,":[24],"films,":[25],"stars,":[26],"publications,":[27],"proteins,":[28],"and":[29,57,67,71,88,97,111,154,209,220,281],"many":[30],"more.":[31],"On":[32],"the":[33,38,41,91,189,195,202,236,284,287],"other":[34],"side,":[35],"most":[36],"of":[37,90,218,248,262,286],"on":[40,137,235],"Web":[42,120,161,185],"is":[43,79,107,135,273],"not":[44,177],"published":[45,117],"in":[46,62,75,193,277],"highly":[47],"structured":[48],"data":[49,74,92],"repositories":[50],"like":[51],"Wikidata,":[52],"but":[53,182],"rather":[54],"as":[55,229],"unstructured":[56],"semi-structured":[58],"content,":[59],"concretely":[61],"HTML":[63],"pages":[64],"containing":[65],"text":[66],"tables.":[68],"Finding,":[69],"monitoring,":[70],"organizing":[72],"this":[73,94,100,223],"requiring":[80],"considerable":[81],"work":[82],"from":[83,143,151,160,179,184],"human":[84,269],"editors.":[85,132],"The":[86,133,271],"volume":[87],"complexity":[89],"make":[93],"task":[95],"difficult":[96],"time-consuming.":[98],"In":[99],"work,":[101],"we":[102,166,242,257],"present":[103],"framework":[105,134,197],"that":[106,115,123,145,168,241,256,264],"able":[108],"identify":[110],"extract":[112,149,158,175,211,233,260],"new":[113,212],"facts":[114,150,159,176,213,234,263],"are":[116,146],"under":[118],"multiple":[119],"domains":[121],"so":[122],"they":[124],"can":[125,171,198,210,226,243,258,265],"be":[126,172,199,227,266],"proposed":[127,196,267],"for":[128,204,214,268],"validation":[129],"by":[130],"relying":[136],"question-answering":[138],"technologies.":[139],"We":[140],"take":[141],"inspiration":[142],"ideas":[144],"used":[147,228],"textual":[152,180],"collections":[153,181],"adapt":[155],"them":[156],"pages.":[162,186],"For":[163],"achieving":[164],"this,":[165],"demonstrate":[167],"language":[169],"models":[170],"adapted":[173],"only":[178],"also":[183],"By":[187],"exploiting":[188],"already":[191],"contained":[192],"trained":[200],"without":[201],"need":[203],"any":[205],"additional":[206],"learning":[207],"signals":[208],"wide":[216],"range":[217],"properties":[219],"domains.":[221],"Following":[222],"path,":[224],"seed":[231],"Web.":[237],"Our":[238],"experiments":[239],"show":[240,255],"achieve":[244],"mean":[246],"performance":[247],"84.07":[249],"at":[250],"F1-score.":[251],"Moreover,":[252],"our":[253],"estimations":[254],"potentially":[259],"millions":[261],"validation.":[270],"goal":[272],"help":[275],"editors":[276],"their":[278],"daily":[279],"tasks":[280],"contribute":[282],"completion":[285],"graph.":[290]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4367046714","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-04-06T11:33:40.131548","created_date":"2023-04-27"}