{"id":"https://openalex.org/W2117084652","doi":"https://doi.org/10.1162/0891201042544938","title":"Fast Approximate Search in Large Dictionaries","display_name":"Fast Approximate Search in Large Dictionaries","publication_year":2004,"publication_date":"2004-11-25","ids":{"openalex":"https://openalex.org/W2117084652","doi":"https://doi.org/10.1162/0891201042544938","mag":"2117084652"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1162/0891201042544938","pdf_url":"http://www.mitpressjournals.org/doi/pdf/10.1162/0891201042544938","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"http://www.mitpressjournals.org/doi/pdf/10.1162/0891201042544938","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114255303","display_name":"Stoyan Mihov","orcid":null},"institutions":[{"id":"https://openalex.org/I4210134980","display_name":"Institute for Parallel Processing","ror":"https://ror.org/047hhy227","country_code":"BG","type":"facility","lineage":["https://openalex.org/I24768866","https://openalex.org/I4210134980"]},{"id":"https://openalex.org/I24768866","display_name":"Bulgarian Academy of Sciences","ror":"https://ror.org/01x8hew03","country_code":"BG","type":"government","lineage":["https://openalex.org/I24768866"]}],"countries":["BG"],"is_corresponding":false,"raw_author_name":"Stoyan Mihov","raw_affiliation_strings":["Bulgarian Academy of Sciences, Linguistic Modelling Department, Institute for Parallel Processing, Bulgarian Academy of Sciences, 25A, Akad. G. Bonchev Str., 1113 Sofia, Bulgaria. E-mail:"],"affiliations":[{"raw_affiliation_string":"Bulgarian Academy of Sciences, Linguistic Modelling Department, Institute for Parallel Processing, Bulgarian Academy of Sciences, 25A, Akad. G. Bonchev Str., 1113 Sofia, Bulgaria. E-mail:","institution_ids":["https://openalex.org/I4210134980","https://openalex.org/I24768866"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109699155","display_name":"Klaus U. Schulz","orcid":null},"institutions":[{"id":"https://openalex.org/I8204097","display_name":"Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen","ror":"https://ror.org/05591te55","country_code":"DE","type":"education","lineage":["https://openalex.org/I8204097"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Klaus U. Schulz","raw_affiliation_strings":["University of Munich, Centrum f\u00fcr Informations-und Sprachverarbeitung, Ludwig-Maximilians-Universit\u00e4t-M\u00fcnchen, Oettingenstr. 67, 80538 Munchen, Germany."],"affiliations":[{"raw_affiliation_string":"University of Munich, Centrum f\u00fcr Informations-und Sprachverarbeitung, Ludwig-Maximilians-Universit\u00e4t-M\u00fcnchen, Oettingenstr. 67, 80538 Munchen, Germany.","institution_ids":["https://openalex.org/I8204097"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":0,"currency":"USD","value_usd":0,"provenance":"doaj"},"apc_paid":{"value":0,"currency":"USD","value_usd":0,"provenance":"doaj"},"fwci":4.902,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":67,"citation_normalized_percentile":{"value":0.963979,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":95},"biblio":{"volume":"30","issue":"4","first_page":"451","last_page":"477"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11567","display_name":"semigroups and automata theory","score":0.9964,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/levenshtein-distance","display_name":"Levenshtein distance","score":0.81211376},{"id":"https://openalex.org/keywords/edit-distance","display_name":"Edit distance","score":0.5727907}],"concepts":[{"id":"https://openalex.org/C2777515626","wikidata":"https://www.wikidata.org/wiki/Q496939","display_name":"Levenshtein distance","level":2,"score":0.81211376},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7853395},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.66933763},{"id":"https://openalex.org/C44359876","wikidata":"https://www.wikidata.org/wiki/Q5338467","display_name":"Edit distance","level":2,"score":0.5727907},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.5630509},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.48448035},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.45784277},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41399485},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.19913},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1162/0891201042544938","pdf_url":"http://www.mitpressjournals.org/doi/pdf/10.1162/0891201042544938","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.67.6424","pdf_url":"http://www.cis.uni-muenchen.de/people/Schulz/Pub/fastapproxsearch.pdf","source":{"id":"https://openalex.org/S4306400349","display_name":"CiteSeer X (The Pennsylvania State University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I130769515","host_organization_name":"Pennsylvania State University","host_organization_lineage":["https://openalex.org/I130769515"],"host_organization_lineage_names":["Pennsylvania State University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1162/0891201042544938","pdf_url":"http://www.mitpressjournals.org/doi/pdf/10.1162/0891201042544938","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"display_name":"Quality education","id":"https://metadata.un.org/sdg/4","score":0.83}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":48,"referenced_works":["https://openalex.org/W1487429615","https://openalex.org/W1515839227","https://openalex.org/W1521776429","https://openalex.org/W1529615245","https://openalex.org/W1548701969","https://openalex.org/W1563342424","https://openalex.org/W1647671624","https://openalex.org/W1650656906","https://openalex.org/W1902405276","https://openalex.org/W1969698078","https://openalex.org/W1970026646","https://openalex.org/W1997204547","https://openalex.org/W1999378673","https://openalex.org/W2001496424","https://openalex.org/W2002089154","https://openalex.org/W2010595692","https://openalex.org/W2011632873","https://openalex.org/W2012659300","https://openalex.org/W2016219933","https://openalex.org/W2019363722","https://openalex.org/W2020035298","https://openalex.org/W2020103094","https://openalex.org/W2023358833","https://openalex.org/W2023843553","https://openalex.org/W2040102554","https://openalex.org/W2041486018","https://openalex.org/W2042850423","https://openalex.org/W2043481183","https://openalex.org/W2045821558","https://openalex.org/W2055247965","https://openalex.org/W2055846397","https://openalex.org/W2057887529","https://openalex.org/W2062235741","https://openalex.org/W2065546971","https://openalex.org/W2066102695","https://openalex.org/W2074064717","https://openalex.org/W2093537029","https://openalex.org/W2099510384","https://openalex.org/W2105491669","https://openalex.org/W2110314280","https://openalex.org/W2154478838","https://openalex.org/W2165156013","https://openalex.org/W2168459716","https://openalex.org/W27881537","https://openalex.org/W2950053197","https://openalex.org/W4214671568","https://openalex.org/W4231741839","https://openalex.org/W82777215"],"related_works":["https://openalex.org/W4362583275","https://openalex.org/W4321609555","https://openalex.org/W4285090010","https://openalex.org/W4280559639","https://openalex.org/W2844405045","https://openalex.org/W2788104449","https://openalex.org/W2575897682","https://openalex.org/W2461708070","https://openalex.org/W2003932770","https://openalex.org/W1531307672"],"abstract_inverted_index":{"The":[0,116],"need":[1],"to":[2,50,107,129],"correct":[3],"garbled":[4],"strings":[5],"arises":[6],"in":[7,42,112],"many":[8],"areas":[9],"of":[10,28,39,85,100,137,155,167,176],"natural":[11,26],"language":[12],"processing.":[13],"If":[14],"a":[15,25,54,74,77,86,113],"dictionary":[16,44],"is":[17,36],"available":[18],"that":[19,144],"covers":[20],"all":[21,40],"possible":[22],"input":[23,34,138,162,178],"tokens,":[24],"set":[27,38],"candidates":[29],"for":[30,45,65,134,147,160],"correcting":[31],"an":[32,168],"erroneous":[33],"P":[35],"the":[37,43,47,83,98,109,152,165,174,177],"words":[41],"which":[46,119,158],"Levenshtein":[48,88],"distance":[49],"Pdoes":[51],"not":[52],"exceed":[53],"given":[55],"(small)":[56],"bound":[57],"k.":[58],"In":[59],"this":[60],"article":[61],"we":[62,90],"describe":[63],"methods":[64,95],"efficiently":[66],"selecting":[67],"such":[68],"candidate":[69],"sets.":[70],"After":[71],"introducing":[72],"as":[73],"starting":[75],"point":[76],"basic":[78,110],"correction":[79,132,145,156],"method":[80,171],"based":[81],"on":[82,151,173],"concept":[84],"\u201cuniversal":[87],"automaton,\u201d":[89],"show":[91],"how":[92],"two":[93],"filtering":[94,170],"known":[96],"from":[97],"field":[99],"approximate":[101],"text":[102],"search":[103],"can":[104],"be":[105],"used":[106],"improve":[108],"procedure":[111],"significant":[114],"way.":[115],"first":[117],"method,":[118],"uses":[120],"standard":[121],"dictionaries":[122,124],"plus":[123],"with":[125],"reversed":[126],"words,":[127],"leads":[128],"very":[130],"short":[131],"times":[133,146],"most":[135],"classes":[136],"strings.":[139],"Our":[140],"evaluation":[141],"results":[142],"demonstrate":[143],"fixed-distance":[148],"bounds":[149],"depend":[150],"expected":[153],"number":[154],"candidates,":[157],"decreases":[159],"longer":[161],"words.":[163,179],"Similarly":[164],"choice":[166],"optimal":[169],"depends":[172],"length":[175]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2117084652","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":6},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":6},{"year":2012,"cited_by_count":3}],"updated_date":"2025-01-16T18:37:09.349330","created_date":"2016-06-24"}