Maven依赖,1.6+版的Dom4j得引入jaxen包
<dependencies>
<dependency>
<groupId>jaxen</groupId>
<artifactId>jaxen</artifactId>
<version>1.1.6</version>
</dependency>
<dependency>
<groupId>dom4j</groupId>
<artifactId>dom4j</artifactId>
<version>1.6</version>
</dependency>
</dependencies>
字符串格式的xml
<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"><channel><title>Java Tutorials and Examples</title><item><title><![CDATA[Java Tutorials]]></title><link>http://www.javacodegeeks.com/</link></item><item><title><![CDATA[Java Examples]]></title><link>http://examples.javacodegeeks.com/</link></item></channel><channel></channel></rss>
格式化的xml
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Java Tutorials and Examples</title>
<item>
<title>
<![CDATA[Java Tutorials]]>
</title>
<link>http://www.javacodegeeks.com/</link>
</item>
<item>
<title>
<![CDATA[Java Examples]]>
</title>
<link>http://examples.javacodegeeks.com/</link>
</item>
</channel>
<channel></channel>
</rss>
带namespace的xml
<dom:rss xmlns:dom="http://www.domstandard.org/dom/5" version="2.0"><dom:channel><dom:title>Java Tutorials and Examples</dom:title><dom:item><dom:title><![CDATA[Java Tutorials]]></dom:title><dom:link>http://www.javacodegeeks.com/</dom:link></dom:item><dom:item><dom:title><![CDATA[Java Examples]]></dom:title><dom:link>http://examples.javacodegeeks.com/</dom:link></dom:item></dom:channel><dom:channel></dom:channel></dom:rss>
完整代码:
package com.blog.dom4j;
import org.dom4j.*;
import java.util.*;
/**
* @Author Daniel
* @Description 使用Dom4j解析xml
**/
public class Dom4jAPI {
static Document doc = null;
static Element root = null;
public static void main(String[] args) {
String xml = "<rss version=\"2.0\"><channel><title>Java Tutorials and Examples</title><item><title><![CDATA[Java Tutorials]]></title><link>http://www.javacodegeeks.com/</link></item><item><title><![CDATA[Java Examples]]></title><link>http://examples.javacodegeeks.com/</link></item></channel><channel><title><title></title></title><title></title></channel></rss>";
try {
doc = DocumentHelper.parseText(xml);
} catch (DocumentException e) {
e.printStackTrace();
}
root = doc.getRootElement();
// traverse(root);
// create();
// update();
// retrieve();
// delete();
// deleteNameSpace();
String res = doc.asXML();
//去掉第一行的注释
System.out.println(res.substring(res.indexOf(">") + 1).trim());
}
public static void create() {
//两种方式添加节点
Element newTag1 = root.addElement("books");
newTag1.setText("addElement");
newTag1.addAttribute("bookid", "1");
Element newTag2 = DocumentHelper.createElement("books");
newTag2.setText("createElement");
newTag2.addAttribute("bookid", "2");
root.add(newTag2);
//克隆一个节点,这里克隆第一个channel节点
Element element = (Element) root.selectSingleNode("//channel").clone();
root.add(element);
}
public static void update() {
//修改节点名
root.setName("root");
}
public static void retrieve() {
// "/"表示从当前元素对象下一级开始找,"//"表示从整个文件个根节点开始找
//selectNodes返回所有符合传入的XPath节点的集合
List<Element> titleList = root.selectNodes("//title");
for (Element element : titleList) {
System.out.println("name:" + element.getName() + ",path:" + element.getPath());
}
Node titleNode = root.selectSingleNode("//title");
System.out.println("titleNode name:" + titleNode.getName() + ",titleNode path:" + titleNode.getPath());
//获取指定位置的指定节点的个数(获取第二个channel下的所有的title)
String XPath = "/rss/channel[2]//title";
int count = doc.numberValueOf("count(" + XPath + ")").intValue();
System.out.println(XPath + "出现的次数为:" + count);
//拿到第一个channel的path:/rss/channel
Element channelElement = (Element) root.selectSingleNode("//channel");
//获取相同path的第2个元素,等同于/rss/channel[2]
System.out.println(channelElement.getXPathResult(2).getPath());
//判断某个element是否为单标签
Node channel2 = root.selectSingleNode("//channel[2]");
boolean flag = channel2.hasContent();
System.out.println("第二个channel不为单标签:" + flag);
}
public static void delete() {
//删除第二个channel
Node channel2 = root.selectSingleNode("//channel[2]");
root.remove(channel2);
}
//遍历整个xml的path
public static void traverse(Element root) {
//root.elements遍历root下一级的所有元素
List<Element> rootList = root.elements();
for (Element element : rootList) {
System.out.println(element.getPath());
traverse(element);
}
}
//删除xml中所有的namespace
public static void deleteNameSpace() {
String namespaceXml = "<dom:rss xmlns:dom=\"http://www.domstandard.org/dom/5\" version=\"2.0\"><dom:channel><dom:title>Java Tutorials and Examples</dom:title><dom:item><dom:title><![CDATA[Java Tutorials]]></dom:title><dom:link>http://www.javacodegeeks.com/</dom:link></dom:item><dom:item><dom:title><![CDATA[Java Examples]]></dom:title><dom:link>http://examples.javacodegeeks.com/</dom:link></dom:item></dom:channel><dom:channel></dom:channel></dom:rss>";
Document document = null;
try {
document = DocumentHelper.parseText(namespaceXml);
} catch (DocumentException e) {
e.printStackTrace();
}
//Load parse class
document.accept(new Dom4jNameSpaceCleaner());
}
}
Dom4jNameSpaceCleaner类,这里继承Dom4j中的VisitorSupport类来实现加载前删除命名空间
package com.blog.dom4j;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.Namespace;
import org.dom4j.VisitorSupport;
import org.dom4j.tree.DefaultElement;
/**
* @Author Daniel
* @Description 删除root与子节点的namespace
**/
public class Dom4jNameSpaceCleaner extends VisitorSupport {
//Remove root namespace
public void visit(Document document) {
((DefaultElement) document.getRootElement())
.setNamespace(Namespace.NO_NAMESPACE);
document.getRootElement().additionalNamespaces().clear();
}
//Remove element namespace
public void visit(Element node) {
if (node instanceof DefaultElement) {
((DefaultElement) node).setNamespace(Namespace.NO_NAMESPACE);
}
}
}