需求:



       读取personList.html网页中所有联系人信息



        按照以下格式输出:



    编号:001    姓名:木丁西    性别:男    年龄:18    地址:XXXXX    电话:XXXXXXXXX



    编号:002    姓名:木丁西    性别:男    年龄:18    地址:XXXXX    电话:XXXXXXXXX


    编号:003    姓名:木丁西    性别:男    年龄:18    地址:XXXXX    电话:XXXXXXXXX  


 html代码如下:


<html>
<head>
<title>2016级毕业同学通讯录</title>
<meta http-equiv="content-type" content="text/html; charset=UTF-8" />
</head>
<body>
<center><h1>2016级毕业同学通讯录</h1></center>
<table border="1" align="center" id="contactForm">
<thead>
<tr><th>编号</th><th>姓名</th><th>性别</th><th>年龄</th><th>地址</th><th>电话</th></tr>
</thead>
<tbody>
<tr>
<td>001</td>
<td>张三</td>
<td>男</td>
<td>18</td>
<td>广州市天河区</td>
<td>18071897423</td>
</tr>
<tr>
<td>002</td>
<td>李四</td>
<td>女</td>
<td>20</td>
<td>广州市越秀区</td>
<td>17021897556</td>
</tr>
<tr>
<td>003</td>
<td>郭靖</td>
<td>男</td>
<td>30</td>
<td>广州市番禺区</td>
<td>1342214321</td>
</tr>
</tbody>
</table>
</body>


Demo如下:



package com.dom4j.xpath;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.Text;
import .SAXReader;
/**
* Author:Liu Zhiyong(QQ:1012421396)
* Version:Version_1
* Date:2016年11月7日21:29:42
* Desc:
需求:读取personList.html中所有联系人信息
按照以下格式输出:
编号:001 姓名:木丁西 性别:男 年龄:18 地址:XXXXX 电话:XXXXXXXXX
编号:002 姓名:木丁西 性别:男 年龄:18 地址:XXXXX 电话:XXXXXXXXX
编号:003 姓名:木丁西 性别:男 年龄:18 地址:XXXXX 电话:XXXXXXXXX
。。。。
*/
public class ReadHtml {
public static void main(String[] args) throws Exception {
//读取personList.html文件,获取Document对象
Document document = new SAXReader().read(new File("./src/personList.html"));
//读取title标签
String xpath = "//title/text()";
Text titleText = (Text)document.selectSingleNode(xpath);
System.out.println(titleText.getText());
//获取所有的tr标签对象(有多少个tr标签就有多少个联系人)
xpath = "//tbody/tr";
List<Element> list = document.selectNodes(xpath);
//创建Person集合对象,存储所有联系人对象
ArrayList<Person> personList = new ArrayList();
//遍历所有联系人---方式1:
/* for(int i=1; i<=list.size(); i++){

//创建Person对象
Person person = new Person();
//获取每个tr下面td标签
xpath = "//tr[" + i + "]/td";
//获取第1个td标签里面的文本值
String id = document.selectSingleNode(xpath + "[1]").getText();
String name = document.selectSingleNode(xpath + "[2]").getText();
String sex = document.selectSingleNode(xpath + "[3]").getText();
String age = document.selectSingleNode(xpath + "[4]").getText();
String address = document.selectSingleNode(xpath + "[5]").getText();
String phone = document.selectSingleNode(xpath + "[6]").getText();

person.setId(id);
person.setName(name);
person.setSex(sex);
person.setAge(age);
person.setAddress(address);
person.setPhone(phone);

//将person对象添加到personList集合中去
personList.add(person);
}*/
//遍历所有联系人---方式2:
/* for(Element e : list){
//获取联系人信息---方式3
Element tdEle = (Element)e.elements().get(0);
String id = tdEle.getText();
tdEle = (Element)e.elements().get(1);
String name = tdEle.getText();
tdEle = (Element)e.elements().get(2);
String sex = tdEle.getText();
tdEle = (Element)e.elements().get(3);
String age = tdEle.getText();
tdEle = (Element)e.elements().get(4);
String address = tdEle.getText();
tdEle = (Element)e.elements().get(5);
String phone = tdEle.getText();

//创建Person对象
Person person = new Person();
person.setId(id);
person.setName(name);
person.setSex(sex);
person.setAge(age);
person.setAddress(address);
person.setPhone(phone);

//将person对象添加到personList集合中去
personList.add(person);
}*/


//遍历所有联系人---方式3:
for(Element e : list){
//注意下面的"td[1]/text()" 表示当前路径下 不能为"/td[1]/text()"
String id = e.selectSingleNode("td[1]/text()").getText();
String name = e.selectSingleNode("td[2]/text()").getText();
String sex = e.selectSingleNode("td[3]/text()").getText();
String age = e.selectSingleNode("td[4]/text()").getText();
String address = e.selectSingleNode("td[5]/text()").getText();
String phone = e.selectSingleNode("td[6]/text()").getText();

//创建Person对象
Person person = new Person();
person.setId(id);
person.setName(name);
person.setSex(sex);
person.setAge(age);
person.setAddress(address);
person.setPhone(phone);

//将person对象添加到personList集合中去
personList.add(person);
}

//打印输出联系人集合
for(Person person : personList){
System.out.println(person);
}
}
}
————————————————



Person对象


package com.dom4j.xpath;
public class Person {
private String id;
private String name;
private String sex;
private String age;
private String address;
private String phone;
public String getId() {
return id;
}
public String getAge() {
return age;
}
public void setAge(String age) {
this.age = age;
}
public void setId(String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
= name;
}
public String getSex() {
return sex;
}
public void setSex(String sex) {
this.sex = sex;
}
public String getAddress() {
return address;
}
public void setAddress(String address) {
this.address = address;
}
public String getPhone() {
return phone;
}
public void setPhone(String phone) {
this.phone = phone;
}
@Override
public String toString() {
return "编号:" + id + "\t 姓名:" + name + "\t 性别:" + sex
+ "\t 年龄:" + age + "\t 地址:" + address + "\t 电话:" + phone;
}
}


效果:



xpath提取网页内容_java