{"id":"https://openalex.org/W4396624292","doi":"https://doi.org/10.48550/arxiv.2405.00505","title":"KVP10k : A Comprehensive Dataset for Key-Value Pair Extraction in\n Business Documents","display_name":"KVP10k : A Comprehensive Dataset for Key-Value Pair Extraction in\n Business Documents","publication_year":2024,"publication_date":"2024-05-01","ids":{"openalex":"https://openalex.org/W4396624292","doi":"https://doi.org/10.48550/arxiv.2405.00505"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2405.00505","pdf_url":"https://arxiv.org/pdf/2405.00505","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2405.00505","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020146315","display_name":"Oshri Naparstek","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Naparstek, Oshri","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5096068527","display_name":"Roi Pony","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pony, Roi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071212365","display_name":"Inbar Shapira","orcid":"https://orcid.org/0000-0002-8016-1749"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shapira, Inbar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5096130418","display_name":"Foad Abo Dahood","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dahood, Foad Abo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005382374","display_name":"Ophir Azulai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Azulai, Ophir","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043633934","display_name":"Yevgeny Yaroker","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yaroker, Yevgeny","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5096068528","display_name":"Nadav Rubinstein","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rubinstein, Nadav","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024178356","display_name":"Maksym Lysak","orcid":"https://orcid.org/0000-0002-3723-6960"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lysak, Maksym","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024778597","display_name":"Peter Staar","orcid":"https://orcid.org/0000-0002-8088-0823"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Staar, Peter","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088090169","display_name":"Ahmed Nassar","orcid":"https://orcid.org/0000-0001-8047-0069"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nassar, Ahmed","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084933934","display_name":"Nikolaos Livathinos","orcid":"https://orcid.org/0000-0001-8513-3491"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Livathinos, Nikolaos","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046722997","display_name":"Christoph Auer","orcid":"https://orcid.org/0000-0001-5761-0422"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Auer, Christoph","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057762043","display_name":"Elad Amrani","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Amrani, Elad","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111188053","display_name":"I Friedman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Friedman, Idan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5096148459","display_name":"Orit Prince","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Prince, Orit","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047695006","display_name":"Yevgeny Burshtein","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Burshtein, Yevgeny","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069713654","display_name":"Adi Raz Goldfarb","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Goldfarb, Adi Raz","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5047102702","display_name":"Udi Barzelay","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Barzelay, Udi","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9606,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9606,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.546564}],"concepts":[{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.79029334},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.546564},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.45347434},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.40296695},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.368944},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.069206744},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.06196025}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2405.00505","pdf_url":"https://arxiv.org/pdf/2405.00505","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2405.00505","pdf_url":"https://arxiv.org/pdf/2405.00505","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4395014643","https://openalex.org/W4391913857","https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2382290278","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2350741829","https://openalex.org/W2001405890"],"abstract_inverted_index":{"In":[0,143],"recent":[1],"years,":[2],"the":[3,37,55,106,181,186],"challenge":[4],"of":[5,67,92,108,156,188],"extracting":[6,60],"information":[7,61,189],"from":[8,28,191],"business":[9,193],"documents":[10],"has":[11,24],"emerged":[12],"as":[13,158,160],"a":[14,63,126,149,163],"critical":[15],"task,":[16],"finding":[17],"applications":[18],"across":[19],"numerous":[20],"domains.":[21],"This":[22,98],"effort":[23],"attracted":[25],"substantial":[26],"interest":[27],"both":[29],"industry":[30],"and":[31,73,95,111,129,176],"academy,":[32],"highlighting":[33],"its":[34,171],"significance":[35],"in":[36,43,162,174,185],"current":[38],"technological":[39],"landscape.":[40],"Most":[41],"datasets":[42,72,110],"this":[44,120],"area":[45],"are":[46],"primarily":[47,103],"focused":[48],"on":[49,78,85],"Key":[50],"Information":[51],"Extraction":[52],"(KIE),":[53],"where":[54],"extraction":[56,190],"process":[57],"revolves":[58],"around":[59],"using":[62],"specific,":[64],"predefined":[65,86],"set":[66],"keys.":[68],"Unlike":[69],"most":[70],"existing":[71],"benchmarks,":[74],"our":[75,144],"focus":[76],"is":[77],"discovering":[79],"key-value":[80],"pairs":[81],"(KVPs)":[82],"without":[83],"relying":[84],"keys,":[87],"navigating":[88],"through":[89],"an":[90],"array":[91],"diverse":[93],"templates":[94],"complex":[96,192],"layouts.":[97],"task":[99,152],"presents":[100],"unique":[101],"challenges,":[102],"due":[104],"to":[105],"absence":[107],"comprehensive":[109],"benchmarks":[112],"tailored":[113],"for":[114,133,183],"non-predetermined":[115],"KVP":[116,134,161],"extraction.":[117,135],"To":[118],"address":[119],"gap,":[121],"we":[122,146],"introduce":[123,148],"KVP10k":[124,166],",":[125],"new":[127,150],"dataset":[128,137],"benchmark":[130],"specifically":[131],"designed":[132],"The":[136],"contains":[138],"10707":[139],"richly":[140,177],"annotated":[141],"images.":[142],"benchmark,":[145],"also":[147],"challenging":[151],"that":[153],"combines":[154],"elements":[155],"KIE":[157],"well":[159],"single":[164],"task.":[165],"sets":[167],"itself":[168],"apart":[169],"with":[170],"extensive":[172],"diversity":[173],"data":[175],"detailed":[178],"annotations,":[179],"paving":[180],"way":[182],"advancements":[184],"field":[187],"documents.":[194]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4396624292","counts_by_year":[],"updated_date":"2024-12-11T00:12:03.290896","created_date":"2024-05-04"}