{"id":"https://openalex.org/W3088511089","doi":"https://doi.org/10.1145/3395027.3419595","title":"The Old Bailey and OCR","display_name":"The Old Bailey and OCR","publication_year":2020,"publication_date":"2020-09-23","ids":{"openalex":"https://openalex.org/W3088511089","doi":"https://doi.org/10.1145/3395027.3419595","mag":"3088511089"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3395027.3419595","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021234098","display_name":"William Ughetta","orcid":null},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"William Ughetta","raw_affiliation_strings":["Department of Computer Science, Princeton, New Jersey, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Princeton, New Jersey, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034540227","display_name":"Brian W. Kernighan","orcid":"https://orcid.org/0000-0003-0741-9085"},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brian W. Kernighan","raw_affiliation_strings":["Department of Computer Science, Princeton, New Jersey, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Princeton, New Jersey, USA","institution_ids":[]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.125,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.542617,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":76,"max":79},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9972,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9937,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.8138756},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.77121985},{"id":"https://openalex.org/keywords/upload","display_name":"Upload","score":0.6537616}],"concepts":[{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.8138756},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.77121985},{"id":"https://openalex.org/C71901391","wikidata":"https://www.wikidata.org/wiki/Q7126699","display_name":"Upload","level":2,"score":0.6537616},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.62035346},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.5163016},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44226244},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.40419737},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38361087},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.36955547},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.33951664},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1198757},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.11023989},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3395027.3419595","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":3,"referenced_works":["https://openalex.org/W1647671624","https://openalex.org/W2008118773","https://openalex.org/W2133485975"],"related_works":["https://openalex.org/W435179959","https://openalex.org/W4238897586","https://openalex.org/W2619091065","https://openalex.org/W2291782699","https://openalex.org/W2284465472","https://openalex.org/W2112883198","https://openalex.org/W2059640416","https://openalex.org/W2000169967","https://openalex.org/W1993948687","https://openalex.org/W1490753184"],"abstract_inverted_index":{"The":[0,35],"Proceedings":[1],"of":[2,9,14,51,82,118,168,201],"the":[3,71,80,90,115,161,165,169,173,182,189,198],"Old":[4,36,72,91],"Bailey":[5,37,73,92],"is":[6,38,74],"a":[7,26,202,207],"corpus":[8],"over":[10,157],"180,000":[11,158],"page":[12],"images":[13],"court":[15],"records":[16],"printed":[17],"from":[18],"April":[19,22],"1674":[20],"to":[21,65,103],"1913":[23],"and":[24,58,60,68,148,163,195,206],"presents":[25],"comprehensive":[27],"challenge":[28],"for":[29,42,96],"Optical":[30],"Character":[31],"Recognition":[32],"(OCR)":[33],"services.":[34],"an":[39],"ideal":[40],"benchmark":[41],"historical":[43,67,86],"document":[44],"OCR,":[45,162],"representing":[46],"more":[47,123],"than":[48,125],"two":[49],"centuries":[50],"variations":[52],"in":[53,113],"documents,":[54],"including":[55],"spellings,":[56],"formats,":[57],"printing":[59],"preservation":[61],"qualities.":[62],"In":[63],"addition":[64],"its":[66],"sociological":[69],"significance,":[70],"filled":[75],"with":[76,84],"imperfections":[77],"that":[78,179],"reflect":[79],"reality":[81],"coping":[83],"large-scale":[85],"data.":[87],"Most":[88],"importantly,":[89],"contains":[93],"human":[94,174],"transcriptions":[95],"each":[97],"page,":[98],"which":[99],"can":[100],"be":[101,122],"used":[102],"help":[104],"measure":[105],"OCR":[106,119,135,170],"accuracy.":[107],"Since":[108],"humans":[109],"do":[110],"make":[111],"mistakes":[112],"transcriptions,":[114],"relative":[116],"performance":[117],"services":[120],"will":[121],"informative":[124],"their":[126],"absolute":[127],"performance.":[128],"This":[129],"paper":[130],"compares":[131],"three":[132],"leading":[133],"commercial":[134],"cloud":[136],"services:":[137],"Amazon":[138],"Web":[139],"Services's":[140],"Textract":[141],"(AWS);":[142],"Microsoft":[143],"Azure's":[144],"Cognitive":[145],"Services":[146],"(Azure);":[147],"Google":[149],"Cloud":[150],"Platform's":[151],"Vision":[152],"(GCP).":[153],"Benchmarking":[154],"involved":[155],"downloading":[156],"images,":[159],"executing":[160],"measuring":[164],"error":[166,185,204],"rate":[167,205],"text":[171],"against":[172],"transcriptions.":[175],"Our":[176],"results":[177],"found":[178],"AWS":[180],"had":[181,188,197],"lowest":[183,190],"median":[184,191],"rate,":[186],"Azure":[187],"round":[192],"trip":[193],"time,":[194],"GCP":[196],"best":[199],"combination":[200],"low":[203,208],"duration.":[209]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3088511089","counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2025-03-19T02:59:49.964345","created_date":"2020-10-01"}