Trie树 不解析, 本园很多博文有提到。
直接上代码:
#coding:utf-8
'''
create on 2013-07-30
@author :HuangYanQiang
'''
LETTER_NUM=27;#组成单词的字母个数,26个字母+'-'
#Trie 结构体
class Node:
def __init__(self, is_word=False):
global LETTER_NUM;
self.is_word = is_word;#是不是单词结束节点
self.prefix_count = 0;#这个前缀的单词个数
self.children = [None for child in range(LETTER_NUM)];
#Trie 结构体
class Trie:
def __init__(self):
self.head = Node();
###插入新单词
def insert(self, word):
current = self.head;
count = 0 ;
for letter in word:
if (letter == '-'):
int_letter=LETTER_NUM-1;
else:
int_letter = ord(letter)-ord('a');
if(current.children[int_letter] is None):
current.children[int_letter] = Node();
current = current.children[int_letter];
count += 1;
current.prefix_count = count;
else:
current = current.children[int_letter];
current.prefix_count += 1;
current.is_word = True;
###查询单词是否存在
def search(self, word):
current = self.head;
int_letter = 0;
for letter in word:
if (letter == '-'):
int_letter=LETTER_NUM-1;
else:
int_letter = ord(letter)-ord('a');
if (current.children[int_letter] is None):
#print "int_letter = " + str(int_letter);
return False;
else:
current = current.children[int_letter];
return current.is_word;
###根据字母前缀输出所有的单词
def output(self,strPrefix):
if(strPrefix is None or strPrefix == ""):
print ("please tell me prefix letter.");
currentNode = self.head;
int_letter = 0;
for letter in strPrefix:
if (letter == '-'):
int_letter=LETTER_NUM-1;
else:
int_letter = ord(letter)-ord('a');
currentNode = currentNode.children[int_letter];
if(currentNode is not None):
if(currentNode.is_word):
print (strPrefix+"; ");
else:
return;
for i in range(LETTER_NUM):
if(currentNode.children[i] is not None):
self.output(strPrefix+chr(i+ord('a')));
#################
###读取单词列表文本构造Trie结构
class BuildTrie:
def __init__(self):
self.trie = Trie();
for line in file("EnglishDict.txt"):
line = line.lower();#全部换成小写
line = line.replace('\r','').replace('\n','');#去掉结束符
isword = True;
int_letter = 0;
str_letter="abcdefghijklmnopqrstuvwxyz-ABCDEFGHIJKLMNOPQRSTUVWXYZ"
for letter in line:
if(letter not in str_letter ):
isword = False;
break;
if(isword == False):
print (line + ", it is not a word");
continue;
else:
self.trie.insert(line);
if __name__=="__main__":
import doctest
doctest.testmod();
# t = Trie();
# t.insert("apple");
# t.insert("abc");
# t.insert("abandon");
# t.insert("bride");
# t.insert("bridegroom");
# t.insert("good");
# t.output("b");
bt = BuildTrie();
t = bt.trie
t.output("z");
print t.search("apple");
print t.search("fff");
print t.search("good");
print("a num:"+str(t.head.children[0].prefix_count));
print("ab num:"+str(t.head.children[0].children[1].prefix_count));
print("b num:"+str(t.head.children[1].prefix_count));