一直做搜索,用的ik,但是用ik的话只能按照ik里面的字典去做分词不太满足自己的场景,但每个分词的原始属性你却没办法打上标签,于是就想自己写一个字典树用最长匹配规则取分词,然后封装自己的标签属性,我也不知道这样玩对不对,反正是写了一个,也不知道自己写的算不算字典树,自己封的 哈哈  ,自己是个小白不知道里面有没有bug,欢迎指正,我自己用了4000w数据怼进去了数据长度大概是10个长度以内,内存用了16g左右吧,没报错,识别也正常, 

字典树是一个前缀搜索树,可以用于做敏感字词匹配,也可以做智能提示,但长度不要太长,容易内存占用过大,可以使用双数组字典树(DAT double array tire )解决内存占用问题,但是使用DAT要提前排序,他不是一个平衡树可能出现某一条支链很庞大,某一条支链只有几条数据的问题。

Java项目数据字典 java实现字典树_java

//tire的数据结构
package com.tire;

import java.util.List;
import java.util.Map;

public class TireNode {
    public char c;
    public boolean hasNext;
    public TireNode pre;
    public TireNode next;
    public List<Object> pos;
    public List<Object> type;
    public List<Object> cityCode;
    public Map<Character, TireNode> subTire;

    public TireNode() {

    }

    public TireNode(char c, TireNode pre, TireNode next) {
        this.c = c;
        this.pre = pre;
        this.next = next;
    }

    public TireNode(TireNode node, Map<Character, TireNode> subTire) {
        node.subTire = subTire;
    }

    public TireNode(char c) {
        this(c, null, null);
    }

}

//操作方法
package com.tire;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.store.FSDirectory;

import com.tianditu.util.Util;

/**
 * 规定有next 就没有subMap
 * 
 * @author wangnanhui
 * 
 */
public class TireTree {
	public int DEFAULT_SIZE = 500;
	public int size = DEFAULT_SIZE;
	public Map<Character, TireNode> dic = new HashMap<Character, TireNode>(size);

	/**
	 * put 数据
	 * 
	 * @param chs
	 * @param type
	 *            数据类型
	 * @param citycode
	 *            行政区划码
	 * @param pos
	 *            所在位置
	 */
	private void putDateToDic(char[] chs, Object type, Object cityCode,
			Object pos) {
		char ch = chs[0];
		TireNode node = dic.get(ch);
		if (node == null) {
			putNoExistToDic(chs, type, cityCode, pos);
		} else {
			putExistToDic(Util.copyChar(chs, 1, chs.length - 1), node, type,
					cityCode, pos);
		}
		setSize();

	}

	/**
	 * 存放存在的数据
	 */
	void putExistToDic(char[] chs, TireNode subRoot, Object type,
			Object cityCode, Object pos) {
		TireNode curret = subRoot;
		// 等于null 说明是单链
		if (curret.next != null) {// 下一个节点不为空
			putData(chs, 0, curret, type, cityCode, pos);
		} else if (curret.subTire != null) {// subTire 不为空
			putData(chs, 0, curret, type, cityCode, pos);
		} else {// 直接添加就行
			putData(chs, 0, curret, type, cityCode, pos);
		}

	}

	void putData(char[] chs, int i, TireNode node, Object type,
			Object cityCode, Object pos) {
		if (i == chs.length) {
			if (node.cityCode == null) {
				node.cityCode = new ArrayList<>();
			}
			if (node.type == null) {
				node.type = new ArrayList<>();
			}
			if (node.pos == null)
				node.pos = new ArrayList<>();
			node.pos.add(pos);
			node.cityCode.add(cityCode);
			node.type.add(type);
			return;
		}

		node.hasNext = true;
		if (node.next != null) {// 下一个节点不为空
			if (node.next.c == chs[i++]) {
				chs = Util.copyChar(chs, i, chs.length - i);
				putData(chs, 0, node.next, type, cityCode, pos);
			} else {
				if (node.subTire == null) {
					if (chs == null || chs.length == 0) {
						return;
					}
					node.subTire = new HashMap<Character, TireNode>();
					TireNode newNode = createNewTire(chs, node, null, type,
							cityCode, pos);
					TireNode oldNode = node.next;
					node.subTire.put(newNode.c, newNode);
					node.subTire.put(oldNode.c, oldNode);
					node.next = null;
					node.hasNext = false;
					return;

				} else {
					TireNode current = node.subTire.get(chs[i]);
					if (current == null) {
						current = createNewTire(
								Util.copyChar(chs, i, chs.length - i), node,
								null, type, cityCode, pos);
						node.subTire.put(current.c, current);
					} else {
						putData(Util.copyChar(chs, i, chs.length - i), 0,
								node.next, type, cityCode, pos);
					}

				}

			}
		} else if (node.subTire != null) {// subTire 不为空
			TireNode current = node.subTire.get(chs[i++]);
			if (current == null) {
				current = createNewTire(chs, node, null, type, cityCode, pos);
				node.subTire.put(current.c, current);
			} else {
				putData(Util.copyChar(chs, i, chs.length - i), 0, current,
						type, cityCode, pos);
			}
		} else {// 直接添加就行
			TireNode newNode = createNewTire(
					Util.copyChar(chs, i, chs.length - i), node, null, type,
					cityCode, pos);
			node.next = newNode;
			node.hasNext = true;
		}

	}

	/**
	 * 存放不存在的数据
	 */
	void putNoExistToDic(char[] chs, Object type, Object cityCode, Object pos) {
		dic.put(chs[0], createNewTire(chs, null, null, type, cityCode, pos));
	}

	/**
	 * 如果字典中包含 abc 待存入词为abcd , abc的subNode还有其他词 如abce ,abcf , 那么就相当于是直接把 d
	 * 放入SubNode 里 或者获取subNode 销毁abc, 将map连接到新的abc上然后将d放入
	 * 
	 * @param chs
	 * @param pre
	 * @param next
	 * @param subNode
	 * @return
	 */
	TireNode put(char ch, Map<Character, TireNode> subNode) {

		return null;
	}

	/**
	 * 正常情况下put 数据
	 * 
	 * @param chs
	 * @param pre
	 * @param next
	 * @return
	 */
	TireNode put(char[] chs, TireNode pre, TireNode next) {

		return null;
	}

	/**
	 * 根据匹配关键字获取数据
	 * 
	 * @param matchWords
	 * @param useFull
	 *            是否全匹配
	 * @return
	 */
	public TireNode get(String matchWords, boolean useFull) {
		if (Util.nullValue(matchWords))
			return null;

		char[] chs = matchWords.toCharArray();
		return get(chs, useFull);

	}

	/**
	 * @param chs
	 * @return
	 */
	TireNode get(char[] chs, boolean useFull) {
		char ch = chs[0];
		TireNode node = dic.get(ch);
		if (chs.length == 1)
			return node;
		if (node == null) {
			return null;
		} else {
			return get(chs, node, 1, useFull);
		}
	}

	public TireNode getTire(String matchWords, boolean useFull) {
		if (Util.nullValue(matchWords))
			return null;

		return this.get(matchWords.toCharArray(), useFull);

	}

	public TireNode getTire(String matchWords) {
		if (Util.nullValue(matchWords))
			return null;

		return this.get(matchWords.toCharArray(), false);

	}

	public String getTireName(TireNode node) {
		List<Character> c = new ArrayList<>();
		while (node != null) {
			c.add(node.c);
			node = node.pre;
		}
		String w = "";
		for (int i = c.size() - 1; i >= 0; i--) {
			w += c.get(i);
		}

		// System.out.println(w);

		return w;
	}

	private List<String> getAllTireName(TireNode node, List<String> list,
			String prefix) {
		prefix = getTireName(node);
		if (node.cityCode != null) {
			list.add(prefix);
		}
		if (node.next != null) {
			prefix += node.next.c;
			getAllTireName(node.next, list, prefix);

		} else if (node.subTire != null) {
			for (char c : node.subTire.keySet()) {
				TireNode current = node.subTire.get(c);
				prefix += c;
				getAllTireName(current, list, prefix);
			}

		}

		return list;

	}

	public List<String> getAllTireName(TireNode node, List<String> list) {
		return getAllTireName(node, list, null);
	}

	TireNode get(char[] chs, TireNode node, int pos, boolean useFull) {
		if (chs == null || chs.length == 0)
			return node;
		if (node.next != null) {// 循环查找
			if (node.next.c == chs[pos++]) {
				return get(Util.copyChar(chs, pos, chs.length - pos),
						node.next, 0, useFull);
			} else {
				if (chs.length > 0 && useFull)
					return null;

				return node;
			}

		} else if (node.subTire != null) {// 在subMap里面查找
			TireNode current = node.subTire.get(chs[pos++]);
			if (current != null) {
				return get(Util.copyChar(chs, pos, chs.length - pos), current,
						0, useFull);
			} else {
				if (chs.length > 0 && useFull)
					return null;
				return node;
			}

		} else {// 返回当前的节点就行
			if (chs.length > 0 && useFull)
				return null;
			return node;
		}
	}

	public void setSize() {
		this.size = dic.size();
	}

	public void put(String words, Object type, Object cityCode, Object pos) {
		if (Util.nullValue(words))
			return;
		putDateToDic(words.toCharArray(), type, cityCode, pos);

	}

	/**
	 * 生成新节点 , 如果有父节点或者儿子节点直接添加就行
	 * 
	 * @param chs
	 * @param pre
	 * @param next
	 * @return
	 */
	public TireNode createNewTire(char[] chs, TireNode pre, TireNode next,
			Object type, Object cityCode, Object pos) {
		TireNode head = new TireNode(chs[0], pre, next);
		TireNode current = head;
		if (chs.length == 1) {
			if (Util.nullValue(current.type)) {
				current.type = new ArrayList<>();
			}
			if (Util.nullValue(current.cityCode)) {
				current.cityCode = new ArrayList<>();
			}
			if (Util.nullValue(current.pos)) {
				current.pos = new ArrayList<>();
			}
			current.cityCode.add(cityCode);
			current.type.add(type);
			current.pos.add(pos);
			current.hasNext = false;
		}
		for (int i = 1; i < chs.length; i++) {
			TireNode nodeNext = new TireNode(chs[i]);
			if (i == chs.length - 1) {
				// 结束
				if (Util.nullValue(nodeNext.type)) {
					nodeNext.type = new ArrayList<>();
				}
				if (Util.nullValue(nodeNext.cityCode)) {
					nodeNext.cityCode = new ArrayList<>();
				}
				if (Util.nullValue(nodeNext.pos)) {
					nodeNext.pos = new ArrayList<>();
				}
				nodeNext.cityCode.add(cityCode);
				nodeNext.type.add(type);
				nodeNext.pos.add(pos);
			} else {
				nodeNext.hasNext = true;
			}
			current.next = nodeNext;
			nodeNext.pre = current;
			current = current.next;
		}

		return head;

	}

	public static void main(String[] args) throws IOException {
		TireTree t = new TireTree();

		IndexReader reader = IndexReader.open(FSDirectory.open(new File(
				"D:/index/AdminIndex")));
		int maxDoc = reader.maxDoc();
		for (int i = 0; i < maxDoc; i++) {
			Document doc = reader.document(i);
			String name = doc.get("name");
			String type = doc.get("type");
			String cityCode = doc.get("totalcity");
			if (!Util.nullValue(name)) {
				t.putDateToDic(name.toCharArray(), type, cityCode, i);
			}

		}
t.putDateToDic("今天天气很好适合出去玩".toCharArray(), "123", "123",1);
           t.putDateToDic("今天天气很不好不适合出去玩".toCharArray(), "123", "123",1);
           TireNode node = t.get("覃塘镇".toCharArray(), true);
           System.out.println(t.getTireName(node));
           node = t.get("今天天气".toCharArray(), false);
           List<String> list = new ArrayList<>();
           t.getAllTireName(node, list, null);
           System.out.println(list);
/*
		 * t.putDateToDic("abc".toCharArray(), "123", "123");
		 * t.putDateToDic("abcd".toCharArray(), "123", "123");
		 * t.putDateToDic("abcde".toCharArray(), "123", "123");
		 * t.putDateToDic("abcdef".toCharArray(), "123", "123");
		 * t.putDateToDic("abcdefg".toCharArray(), "123", "123");
		 * t.putDateToDic("abcdefgh".toCharArray(), "123", "123");
		 * t.putDateToDic("bc".toCharArray(), "123", "123");
		 * t.putDateToDic("bcd".toCharArray(), "123", "123");
		 * t.putDateToDic("cde".toCharArray(), "123", "123");
		 * t.putDateToDic("cdef".toCharArray(), "123", "123");
		 * t.putDateToDic("defg".toCharArray(), "123", "123");
		 * t.putDateToDic("dfg".toCharArray(), "123", "123");
		 * t.putDateToDic("dgfe".toCharArray(), "123", "123");
		 * t.putDateToDic("dg啊fe".toCharArray(), "123", "123");
		 * t.putDateToDic("daefg".toCharArray(), "123", "123");
		 * 
		 * t.putDateToDic("张".toCharArray(), "123", "123");
		 * t.putDateToDic("张三".toCharArray(), "123", "123");
		 * 
		 * t.putDateToDic("张四".toCharArray(), "123", "123");
		 * t.putDateToDic("张四五".toCharArray(), "123", "123");
		 * 
		 * t.putDateToDic("张五六".toCharArray(), "123", "123");
		 * t.putDateToDic("张五七".toCharArray(), "123", "123");
		 * 
		 * t.putDateToDic("张六二".toCharArray(), "123", "1231314");
		 * t.putDateToDic("张六一".toCharArray(), "123", "12312313");
		 * t.putDateToDic("张六二".toCharArray(), "123", "1231321");
		 * t.putDateToDic("张六一".toCharArray(), "123", "12312313");
		 * t.putDateToDic("张六二".toCharArray(), "123", "123132132");
		 * t.putDateToDic("张六一".toCharArray(), "123", "1231231");
		 * 
		 * t.putDateToDic("efgh".toCharArray(), "123", "123");
		 */
		System.out.println();
	}



}

Java项目数据字典 java实现字典树_List_02