htmlparse中自定义节点

 

public class BoldTag extends CompositeTag{
	private static final String[] mIds = new String[] {"B"};
    public BoldTag ()
    {
    }
    public String[] getIds ()
    {
        return (mIds);
    }
    public String[] getEnders ()
    {
        return (mIds);
    }
    public String[] getEndTagEnders ()
    {
        return (new String[0]);
    }
}

 对自定义的节点需要注册到PrototypicalNodeFactory中才可以正常使用

Parser parser = Parser.createParser(html, "UTF-8");
		PrototypicalNodeFactory factory = (PrototypicalNodeFactory)parser.getNodeFactory();//获取PrototypicalNodeFactory
		factory.registerTag(new BoldTag());//将自定义Tag注册进factory
		NodeFilter filter = new TagNameFilter("b");
		NodeList list = parser.extractAllNodesThatMatch(filter);
		for(int i=0;i<list.size();i++){
			Node node = list.elementAt(i);
			TagNode tn = (TagNode)node;
			System.out.println("==tohmtl=="+node.toHtml());
			System.out.println("==class=="+node.getClass());
			System.out.println(tn.getChildren().asString());
		}