{"id":"https://openalex.org/W1998729616","doi":"https://doi.org/10.1145/2595188.2595195","title":"Automatic article extraction in old newspapers digitized collections","display_name":"Automatic article extraction in old newspapers digitized collections","publication_year":2014,"publication_date":"2014-05-19","ids":{"openalex":"https://openalex.org/W1998729616","doi":"https://doi.org/10.1145/2595188.2595195","mag":"1998729616"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/2595188.2595195","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075258341","display_name":"David G. Hebert","orcid":"https://orcid.org/0000-0003-3926-5228"},"institutions":[{"id":"https://openalex.org/I62396329","display_name":"Universit\u00e9 de Rouen Normandie","ror":"https://ror.org/03nhjew95","country_code":"FR","type":"funder","lineage":["https://openalex.org/I4210105918","https://openalex.org/I62396329"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"David Hebert","raw_affiliation_strings":["LITIS, University of Rouen, Rouvray#TAB#"],"affiliations":[{"raw_affiliation_string":"LITIS, University of Rouen, Rouvray#TAB#","institution_ids":["https://openalex.org/I62396329"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041298953","display_name":"Thomas Palfray","orcid":null},"institutions":[{"id":"https://openalex.org/I62396329","display_name":"Universit\u00e9 de Rouen Normandie","ror":"https://ror.org/03nhjew95","country_code":"FR","type":"funder","lineage":["https://openalex.org/I4210105918","https://openalex.org/I62396329"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Thomas Palfray","raw_affiliation_strings":["LITIS, University of Rouen, Rouvray#TAB#"],"affiliations":[{"raw_affiliation_string":"LITIS, University of Rouen, Rouvray#TAB#","institution_ids":["https://openalex.org/I62396329"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059213449","display_name":"St\u00e9phane Nicolas","orcid":"https://orcid.org/0000-0003-0758-9930"},"institutions":[{"id":"https://openalex.org/I62396329","display_name":"Universit\u00e9 de Rouen Normandie","ror":"https://ror.org/03nhjew95","country_code":"FR","type":"funder","lineage":["https://openalex.org/I4210105918","https://openalex.org/I62396329"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Stephane Nicolas","raw_affiliation_strings":["LITIS, University of Rouen, Rouvray#TAB#"],"affiliations":[{"raw_affiliation_string":"LITIS, University of Rouen, Rouvray#TAB#","institution_ids":["https://openalex.org/I62396329"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112340423","display_name":"Pierrick Tranouez","orcid":"https://orcid.org/0000-0002-1962-0782"},"institutions":[{"id":"https://openalex.org/I62396329","display_name":"Universit\u00e9 de Rouen Normandie","ror":"https://ror.org/03nhjew95","country_code":"FR","type":"funder","lineage":["https://openalex.org/I4210105918","https://openalex.org/I62396329"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Pierrick Tranouez","raw_affiliation_strings":["LITIS, University of Rouen, Rouvray#TAB#"],"affiliations":[{"raw_affiliation_string":"LITIS, University of Rouen, Rouvray#TAB#","institution_ids":["https://openalex.org/I62396329"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022490769","display_name":"Thierry Paquet","orcid":"https://orcid.org/0000-0002-2044-7542"},"institutions":[{"id":"https://openalex.org/I62396329","display_name":"Universit\u00e9 de Rouen Normandie","ror":"https://ror.org/03nhjew95","country_code":"FR","type":"funder","lineage":["https://openalex.org/I4210105918","https://openalex.org/I62396329"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Thierry Paquet","raw_affiliation_strings":["LITIS, University of Rouen, Rouvray#TAB#"],"affiliations":[{"raw_affiliation_string":"LITIS, University of Rouen, Rouvray#TAB#","institution_ids":["https://openalex.org/I62396329"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.967,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":22,"citation_normalized_percentile":{"value":0.920819,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":92},"biblio":{"volume":null,"issue":null,"first_page":"3","last_page":"8"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9991,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/document-layout-analysis","display_name":"Document layout analysis","score":0.6490131},{"id":"https://openalex.org/keywords/representation","display_name":"Representation","score":0.5625107}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.80693746},{"id":"https://openalex.org/C152565575","wikidata":"https://www.wikidata.org/wiki/Q1124538","display_name":"Conditional random field","level":2,"score":0.68466645},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.6618247},{"id":"https://openalex.org/C72773152","wikidata":"https://www.wikidata.org/wiki/Q5287629","display_name":"Document layout analysis","level":3,"score":0.6490131},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.59609383},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.59184325},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.588987},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5849076},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5625107},{"id":"https://openalex.org/C201280247","wikidata":"https://www.wikidata.org/wiki/Q11032","display_name":"Newspaper","level":2,"score":0.51193035},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.48415214},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.44613487},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.43284237},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39301482},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3595227},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.31460118},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.15218437},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C112698675","wikidata":"https://www.wikidata.org/wiki/Q37038","display_name":"Advertising","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/2595188.2595195","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.88,"display_name":"Quality education"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":14,"referenced_works":["https://openalex.org/W1491852184","https://openalex.org/W1508642176","https://openalex.org/W1594459094","https://openalex.org/W1763905908","https://openalex.org/W2052301383","https://openalex.org/W2090985514","https://openalex.org/W2095844239","https://openalex.org/W2096210305","https://openalex.org/W2145959310","https://openalex.org/W2147880316","https://openalex.org/W2153166076","https://openalex.org/W2162031011","https://openalex.org/W2551151849","https://openalex.org/W2734798343"],"related_works":["https://openalex.org/W50079190","https://openalex.org/W3102147106","https://openalex.org/W2356597680","https://openalex.org/W2347460059","https://openalex.org/W2114846443","https://openalex.org/W2111726165","https://openalex.org/W2100125468","https://openalex.org/W2093471820","https://openalex.org/W2045514505","https://openalex.org/W1984858032"],"abstract_inverted_index":{"We":[0],"present":[1],"a":[2,59,67,71,87,103,108,161,165,174,185],"complete":[3],"method":[4,156],"for":[5,146],"article":[6,113,128],"segmentation":[7,114],"in":[8,33,36,47,66,76,102],"old":[9],"newspapers,":[10],"which":[11,157],"deals":[12],"with":[13,70,86,149],"complex":[14,147],"layouts":[15],"analysis":[16,51,121,163,178],"of":[17,27,45,52,84,96,126,141,187],"degraded":[18],"documents.":[19],"The":[20,50],"designed":[21],"workflow":[22],"can":[23],"process":[24],"large":[25],"amounts":[26],"documents":[28],"and":[29,42,115,152,173,197],"generates":[30],"digital":[31,48],"objects":[32],"METS/ALTO":[34],"format":[35],"order":[37,77],"to":[38,78,80,106,201],"facilitate":[39],"the":[40,43,53,82,97,124,134],"indexing":[41],"browsing":[44],"information":[46],"libraries.":[49],"document":[54,98,135,191],"image":[55],"is":[56,100,182],"performed":[57],"by":[58],"two":[60],"stages":[61],"scheme.":[62],"Pixels":[63],"are":[64],"labeled":[65],"first":[68,93],"stage":[69,105],"Conditional":[72],"Random":[73],"Field":[74],"model":[75,167],"intent":[79],"label":[81],"areas":[83],"interest":[85],"low":[88],"logical":[89,94,110],"level.":[90],"Then":[91],"this":[92],"representation":[95,111],"content":[99],"analyzed":[101],"second":[104],"get":[107],"higher":[109],"including":[112],"reading":[116],"order.":[117],"This":[118,155],"top-level":[119],"structural":[120,177],"relies":[122],"on":[123,133,184],"generation":[125],"an":[127],"separation":[129],"grid":[130],"applied":[131],"recursively":[132],"image,":[136],"allowing":[137],"analyzing":[138],"any":[139],"type":[140],"Manhattan":[142],"page":[143,199],"layout,":[144],"even":[145],"structures":[148],"multiple":[150],"columns":[151],"overlapping":[153],"entities.":[154],"benefits":[158],"from":[159],"both":[160],"local":[162,189],"using":[164,169,179],"probabilistic":[166],"trained":[168],"machine":[170],"learning":[171],"procedures,":[172],"more":[175],"global":[176],"recursive":[180],"rules,":[181],"evaluated":[183],"dataset":[186],"daily":[188],"press":[190],"images":[192],"covering":[193],"several":[194],"time":[195],"periods":[196],"different":[198],"layouts,":[200],"prove":[202],"its":[203],"effectiveness.":[204]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W1998729616","counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":1},{"year":2014,"cited_by_count":1}],"updated_date":"2025-03-18T10:10:39.164191","created_date":"2016-06-24"}