{"id":"https://openalex.org/W3170883728","doi":"https://doi.org/10.1145/3497842","title":"Can BERT Dig It? Named Entity Recognition for Information Retrieval in the Archaeology Domain","display_name":"Can BERT Dig It? Named Entity Recognition for Information Retrieval in the Archaeology Domain","publication_year":2022,"publication_date":"2022-02-18","ids":{"openalex":"https://openalex.org/W3170883728","doi":"https://doi.org/10.1145/3497842","mag":"3170883728"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3497842","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3497842","source":{"id":"https://openalex.org/S4210184050","display_name":"Journal on Computing and Cultural Heritage","issn_l":"1556-4711","issn":["1556-4711","1556-4673"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3497842","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5067546540","display_name":"Alex Brandsen","orcid":"https://orcid.org/0000-0003-1623-1340"},"institutions":[{"id":"https://openalex.org/I121797337","display_name":"Leiden University","ror":"https://ror.org/027bh9e22","country_code":"NL","type":"education","lineage":["https://openalex.org/I121797337"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Alex Brandsen","raw_affiliation_strings":["Leiden University, Leiden, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Leiden University, Leiden, The Netherlands","institution_ids":["https://openalex.org/I121797337"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027124439","display_name":"Suzan Verberne","orcid":"https://orcid.org/0000-0002-9609-9505"},"institutions":[{"id":"https://openalex.org/I121797337","display_name":"Leiden University","ror":"https://ror.org/027bh9e22","country_code":"NL","type":"education","lineage":["https://openalex.org/I121797337"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Suzan Verberne","raw_affiliation_strings":["Leiden University, Leiden, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Leiden University, Leiden, The Netherlands","institution_ids":["https://openalex.org/I121797337"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044340396","display_name":"Karsten Lambers","orcid":"https://orcid.org/0000-0001-6432-0925"},"institutions":[{"id":"https://openalex.org/I121797337","display_name":"Leiden University","ror":"https://ror.org/027bh9e22","country_code":"NL","type":"education","lineage":["https://openalex.org/I121797337"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Karsten Lambers","raw_affiliation_strings":["Leiden University, Leiden, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Leiden University, Leiden, The Netherlands","institution_ids":["https://openalex.org/I121797337"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079151150","display_name":"M. Wansleeben","orcid":"https://orcid.org/0000-0001-6895-6058"},"institutions":[{"id":"https://openalex.org/I121797337","display_name":"Leiden University","ror":"https://ror.org/027bh9e22","country_code":"NL","type":"education","lineage":["https://openalex.org/I121797337"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Milco Wansleeben","raw_affiliation_strings":["Leiden University, Leiden, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Leiden University, Leiden, The Netherlands","institution_ids":["https://openalex.org/I121797337"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":5.35,"has_fulltext":false,"cited_by_count":54,"citation_normalized_percentile":{"value":0.99997,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"15","issue":"3","first_page":"1","last_page":"18"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.994,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named Entity Recognition","score":0.5829402},{"id":"https://openalex.org/keywords/clef","display_name":"Clef","score":0.5697166},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.48452276}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7844184},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.67225647},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.5829402},{"id":"https://openalex.org/C107763842","wikidata":"https://www.wikidata.org/wiki/Q181040","display_name":"Clef","level":3,"score":0.5697166},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5291954},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5277553},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5168762},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5126042},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.49768403},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49012336},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.48452276},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.441068},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.43640763},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.18510172},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.13987723},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.13764334},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.077780455},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3497842","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3497842","source":{"id":"https://openalex.org/S4210184050","display_name":"Journal on Computing and Cultural Heritage","issn_l":"1556-4711","issn":["1556-4711","1556-4673"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://hdl.handle.net/1887/3464621","pdf_url":"https://scholarlypublications.universiteitleiden.nl/access/item%3A3464622/view","source":{"id":"https://openalex.org/S4306401843","display_name":"Data Archiving and Networked Services (DANS)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1322597698","host_organization_name":"Royal Netherlands Academy of Arts and Sciences","host_organization_lineage":["https://openalex.org/I1322597698"],"host_organization_lineage_names":["Royal Netherlands Academy of Arts and Sciences"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2106.07742","pdf_url":"https://arxiv.org/pdf/2106.07742","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3497842","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3497842","source":{"id":"https://openalex.org/S4210184050","display_name":"Journal on Computing and Cultural Heritage","issn_l":"1556-4711","issn":["1556-4711","1556-4673"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"score":0.82,"id":"https://metadata.un.org/sdg/4","display_name":"Quality education"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":56,"referenced_works":["https://openalex.org/W155834710","https://openalex.org/W1614298861","https://openalex.org/W189367193","https://openalex.org/W1975915866","https://openalex.org/W1993692165","https://openalex.org/W2002379171","https://openalex.org/W2098700435","https://openalex.org/W2099244340","https://openalex.org/W2117772341","https://openalex.org/W2130678848","https://openalex.org/W2147880316","https://openalex.org/W2163167319","https://openalex.org/W2177726182","https://openalex.org/W218702423","https://openalex.org/W2251404238","https://openalex.org/W2489012668","https://openalex.org/W2502070859","https://openalex.org/W2565815462","https://openalex.org/W2614062905","https://openalex.org/W2614581385","https://openalex.org/W2896457183","https://openalex.org/W2911489562","https://openalex.org/W2913668833","https://openalex.org/W2914236322","https://openalex.org/W2921553604","https://openalex.org/W2921621939","https://openalex.org/W2946119234","https://openalex.org/W2946676565","https://openalex.org/W2948223045","https://openalex.org/W2963250244","https://openalex.org/W2963341956","https://openalex.org/W2963809228","https://openalex.org/W2970771982","https://openalex.org/W2976444281","https://openalex.org/W2979826702","https://openalex.org/W2984930866","https://openalex.org/W2987972786","https://openalex.org/W2990524204","https://openalex.org/W2992787485","https://openalex.org/W2995647371","https://openalex.org/W3023545062","https://openalex.org/W3023618320","https://openalex.org/W3031359449","https://openalex.org/W3035858791","https://openalex.org/W3038047279","https://openalex.org/W3044739286","https://openalex.org/W3091382176","https://openalex.org/W3098824823","https://openalex.org/W3104415840","https://openalex.org/W3105220303","https://openalex.org/W3112254312","https://openalex.org/W3114950584","https://openalex.org/W3183336871","https://openalex.org/W3212421086","https://openalex.org/W4242764912","https://openalex.org/W51649439"],"related_works":["https://openalex.org/W4387517132","https://openalex.org/W4232115401","https://openalex.org/W3138512925","https://openalex.org/W2970828463","https://openalex.org/W2917705549","https://openalex.org/W2629131222","https://openalex.org/W2296679031","https://openalex.org/W1839123017","https://openalex.org/W1592594506","https://openalex.org/W117735687"],"abstract_inverted_index":{"The":[0,159],"amount":[1],"of":[2,53,157,197,213,262],"archaeological":[3,27,34,66,86],"literature":[4],"is":[5],"growing":[6],"rapidly.":[7],"Until":[8],"recently,":[9],"these":[10],"data":[11,235],"were":[12],"only":[13],"accessible":[14],"through":[15],"metadata":[16],"search.":[17],"We":[18,88,109,133,185],"implemented":[19],"a":[20,25,46,54,75,100,105,126,146,223,243,266],"text":[21,28,226],"retrieval":[22],"engine":[23],"for":[24,114,215,222,249],"large":[26],"collection":[29,64,204],"(~658":[30],"million":[31],"words).":[32],"In":[33,69],"IR,":[35],"domain-specific":[36,234,257],"entities":[37],"such":[38,228],"as":[39,229],"locations,":[40],"time":[41],"periods":[42],"and":[43,93,104,119,141,172,187,195,205,255],"artefacts":[44],"play":[45],"central":[47],"role.":[48],"This":[49],"motivated":[50],"the":[51,62,90,121,139,166,177,182,190,193,198,202,211,237,253,260],"development":[52],"named":[55,67],"entity":[56],"recognition":[57],"(NER)":[58],"model":[59,82,103,124,143,160],"to":[60,99],"annotate":[61],"full":[63,203],"with":[65,125,145],"entities.":[68],"this":[70],"article,":[71],"we":[72],"present":[73],"ArcheoBERTje,":[74],"BERT":[76,117,123,199],"(Bidirectional":[77],"Encoder":[78],"Representations":[79],"from":[80,176,265],"Transformers)":[81],"pre-trained":[83],"on":[84,95,201,233,240],"Dutch":[85,107,142],"texts.":[87],"compare":[89],"model\u2019s":[91,238],"quality":[92,239],"output":[94,196],"an":[96,153],"NER":[97,241],"task":[98],"generic":[101,106],"multilingual":[102,140],"model.":[108],"also":[110,161],"investigate":[111],"ensemble":[112,163],"methods":[113,164],"combining":[115,120,165],"multiple":[116],"models,":[118],"best":[122],"domain":[127,174,227,263],"thesaurus":[128,178,267],"using":[129],"conditional":[130],"random":[131],"fields.":[132],"find":[134],"that":[135,221,256],"ArcheoBERTje":[136,170],"outperforms":[137,162],"both":[138],"significantly":[144],"smaller":[147],"standard":[148],"deviation":[149],"between":[150,192],"runs,":[151],"reaching":[152],"average":[154],"F1":[155,183],"score":[156],"0.735.":[158],"three":[167],"models.":[168],"Combining":[169],"predictions":[171],"explicit":[173],"knowledge":[175,264],"did":[179],"not":[180],"increase":[181],"score.":[184],"quantitatively":[186],"qualitatively":[188],"analyse":[189],"differences":[191],"vocabulary":[194],"models":[200],"provide":[206],"some":[207],"valuable":[208],"insights":[209],"in":[210,252],"effect":[212],"fine-tuning":[214],"specific":[216,225],"domains.":[217],"Our":[218],"results":[219],"indicate":[220],"highly":[224],"archaeology,":[230],"further":[231],"pre-training":[232,258],"increases":[236],"by":[242],"much":[244],"larger":[245],"margin":[246],"than":[247],"shown":[248],"other":[250],"domains":[251],"literature,":[254],"makes":[259],"addition":[261],"unnecessary.":[268]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3170883728","counts_by_year":[{"year":2024,"cited_by_count":12},{"year":2023,"cited_by_count":14},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":1}],"updated_date":"2024-12-10T07:45:27.491767","created_date":"2021-06-22"}