{"id":"https://openalex.org/W3176028514","doi":"https://doi.org/10.1109/jcdl52503.2021.00030","title":"ScanBank: A Benchmark Dataset for Figure Extraction from Scanned Electronic Theses and Dissertations","display_name":"ScanBank: A Benchmark Dataset for Figure Extraction from Scanned Electronic Theses and Dissertations","publication_year":2021,"publication_date":"2021-09-01","ids":{"openalex":"https://openalex.org/W3176028514","doi":"https://doi.org/10.1109/jcdl52503.2021.00030","mag":"3176028514"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/jcdl52503.2021.00030","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2106.15320","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080451138","display_name":"Sampanna Yashwant Kahu","orcid":"https://orcid.org/0000-0002-8522-2926"},"institutions":[{"id":"https://openalex.org/I859038795","display_name":"Virginia Tech","ror":"https://ror.org/02smfhw86","country_code":"US","type":"funder","lineage":["https://openalex.org/I859038795"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sampanna Yashwant Kahu","raw_affiliation_strings":["Virginia Polytechnic Institute and State University,Department of Electrical and Computer Engineering,Blacksburg,Virginia,24061"],"affiliations":[{"raw_affiliation_string":"Virginia Polytechnic Institute and State University,Department of Electrical and Computer Engineering,Blacksburg,Virginia,24061","institution_ids":["https://openalex.org/I859038795"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062743697","display_name":"William A. Ingram","orcid":"https://orcid.org/0000-0002-8307-8844"},"institutions":[{"id":"https://openalex.org/I859038795","display_name":"Virginia Tech","ror":"https://ror.org/02smfhw86","country_code":"US","type":"funder","lineage":["https://openalex.org/I859038795"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"William A Ingram","raw_affiliation_strings":["Virginia Polytechnic Institute and State University,Department of Computer Science,Blacksburg,Virginia,24061"],"affiliations":[{"raw_affiliation_string":"Virginia Polytechnic Institute and State University,Department of Computer Science,Blacksburg,Virginia,24061","institution_ids":["https://openalex.org/I859038795"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049148461","display_name":"Edward A. Fox","orcid":"https://orcid.org/0000-0003-1447-6870"},"institutions":[{"id":"https://openalex.org/I859038795","display_name":"Virginia Tech","ror":"https://ror.org/02smfhw86","country_code":"US","type":"funder","lineage":["https://openalex.org/I859038795"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Edward A Fox","raw_affiliation_strings":["Virginia Polytechnic Institute and State University,Department of Computer Science,Blacksburg,Virginia,24061"],"affiliations":[{"raw_affiliation_string":"Virginia Polytechnic Institute and State University,Department of Computer Science,Blacksburg,Virginia,24061","institution_ids":["https://openalex.org/I859038795"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075242841","display_name":"Jian Wu","orcid":"https://orcid.org/0000-0003-0173-4463"},"institutions":[{"id":"https://openalex.org/I81365321","display_name":"Old Dominion University","ror":"https://ror.org/04zjtrb98","country_code":"US","type":"funder","lineage":["https://openalex.org/I81365321"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jian Wu","raw_affiliation_strings":["Old Dominion University,Department of Computer Science,Norfolk,Virginia,23529"],"affiliations":[{"raw_affiliation_string":"Old Dominion University,Department of Computer Science,Norfolk,Virginia,23529","institution_ids":["https://openalex.org/I81365321"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.999875,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":91},"biblio":{"volume":null,"issue":null,"first_page":"180","last_page":"191"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9966,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9942,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6889881}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7054072},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6889881},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.56392187},{"id":"https://openalex.org/C513874922","wikidata":"https://www.wikidata.org/wiki/Q212805","display_name":"Digital library","level":3,"score":0.53689814},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5051301},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5019796},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.49750093},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4911246},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.47261217},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.34389347},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3371554},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.1488252},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.12457365},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.07802528},{"id":"https://openalex.org/C164913051","wikidata":"https://www.wikidata.org/wiki/Q482","display_name":"Poetry","level":2,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/jcdl52503.2021.00030","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2106.15320","pdf_url":"https://arxiv.org/pdf/2106.15320","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2106.15320","pdf_url":"https://arxiv.org/pdf/2106.15320","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"display_name":"Quality education","id":"https://metadata.un.org/sdg/4","score":0.61}],"grants":[{"funder":"https://openalex.org/F4320306122","funder_display_name":"Institute of Museum and Library Services","award_id":"LG-37-19-0078-198"}],"datasets":[],"versions":[],"referenced_works_count":38,"referenced_works":["https://openalex.org/W1603719052","https://openalex.org/W1861492603","https://openalex.org/W1991869139","https://openalex.org/W2042014073","https://openalex.org/W2179352600","https://openalex.org/W2194775991","https://openalex.org/W2222512263","https://openalex.org/W2416987009","https://openalex.org/W2577020101","https://openalex.org/W2775795276","https://openalex.org/W2788907956","https://openalex.org/W2795424778","https://openalex.org/W2803654587","https://openalex.org/W2919502278","https://openalex.org/W2942265585","https://openalex.org/W2950830858","https://openalex.org/W2962793481","https://openalex.org/W2963037989","https://openalex.org/W2963542991","https://openalex.org/W2972380778","https://openalex.org/W2981630388","https://openalex.org/W2990230185","https://openalex.org/W2991040477","https://openalex.org/W3001732141","https://openalex.org/W3018757597","https://openalex.org/W3022226252","https://openalex.org/W3030600301","https://openalex.org/W3032536818","https://openalex.org/W3032538329","https://openalex.org/W3042011474","https://openalex.org/W3098722327","https://openalex.org/W3101186801","https://openalex.org/W3101577715","https://openalex.org/W3113753692","https://openalex.org/W4244393294","https://openalex.org/W4287782095","https://openalex.org/W4295101875","https://openalex.org/W791527587"],"related_works":["https://openalex.org/W915438175","https://openalex.org/W4321353415","https://openalex.org/W4246352526","https://openalex.org/W4230315250","https://openalex.org/W2745001401","https://openalex.org/W2378211422","https://openalex.org/W2130974462","https://openalex.org/W2121910908","https://openalex.org/W2086519370","https://openalex.org/W2028665553"],"abstract_inverted_index":{"We":[0,175,198],"focus":[1],"on":[2,129,139,187,271],"electronic":[3],"theses":[4,52],"and":[5,12,24,33,47,53,76,97,193,200,261,277],"dissertations":[6,54],"(ETDs),":[7],"aiming":[8],"to":[9,30,58,61,163,179,189,229,235],"improve":[10],"access":[11],"expand":[13],"their":[14],"utility,":[15],"since":[16,48],"more":[17],"than":[18],"6":[19],"million":[20],"are":[21,45,233],"publicly":[22],"available,":[23],"they":[25,102,124,134],"constitute":[26],"an":[27],"important":[28,202],"corpus":[29,38],"aid":[31],"research":[32,203],"education":[34],"across":[35],"disciplines.":[36],"The":[37],"is":[39,120,132,253],"growing":[40],"as":[41,70,162],"new":[42,150],"born-digital":[43,100,140,230],"documents":[44,231],"included,":[46],"millions":[49],"of":[50,83,115,166,217,224,249],"older":[51],"have":[55,91,135],"been":[56,92,137],"converted":[57],"digital":[59],"form":[60],"be":[62],"disseminated":[63],"electronically":[64],"in":[65,85],"institutional":[66],"repositories.":[67],"In":[68],"ETDs,":[69],"with":[71,107],"other":[72],"scholarly":[73],"works,":[74],"figures":[75,96,170,192],"tables":[77,98,172,194],"can":[78],"communicate":[79],"a":[80,86,149,181,283],"large":[81],"amount":[82],"information":[84],"concise":[87],"way.":[88],"Although":[89],"methods":[90,209,281],"proposed":[93],"for":[94,210,222,240,259,264],"extracting":[95],"from":[99,195,213,243],"PDFs,":[101],"do":[103,125],"not":[104,126],"work":[105],"well":[106,128],"scanned":[108,130,155,196,214,244,265],"ETDs.":[109,197,266],"Considering":[110],"this":[111,144,177],"problem,":[112],"our":[113,250],"assessment":[114],"state-of-the-art":[116],"figure":[117,211,241,260],"extraction":[118,212,242,263],"systems":[119],"that":[121,133],"the":[122,164,167,220,247,254],"reason":[123],"function":[127],"PDFs":[131],"only":[136],"trained":[138,270],"documents.":[141,215,245],"To":[142,246],"address":[143],"limitation,":[145],"we":[146],"present":[147],"ScanBank,":[148,272],"dataset":[151,178,258],"containing":[152],"10":[153],"thousand":[154,169],"page":[156],"images,":[157],"manually":[158,256],"labeled":[159],"by":[160,282],"humans":[161],"presence":[165],"3.3":[168],"or":[171],"found":[173],"therein.":[174],"use":[176],"train":[180,236],"deep":[182],"neural":[183],"network":[184],"model":[185],"based":[186],"YOLOv5":[188],"accurately":[190],"extract":[191],"pose":[199],"answer":[201],"questions":[204],"aimed":[205],"at":[206],"finding":[207],"better":[208,238],"One":[216],"those":[218],"concerns":[219],"value":[221],"training,":[223],"data":[225],"augmentation":[226],"techniques":[227],"applied":[228],"which":[232],"used":[234],"models":[237],"suited":[239],"best":[248],"knowledge,":[251],"ScanBank":[252],"first":[255],"annotated":[257],"table":[262],"A":[267],"YOLOv5-based":[268],"model,":[269],"outperforms":[273],"existing":[274],"comparable":[275],"open-source":[276],"freely":[278],"available":[279],"baseline":[280],"considerable":[284],"margin.":[285]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3176028514","counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2025-02-16T11:16:57.242547","created_date":"2021-07-05"}