关于写过关于JAVA采集入库的三篇文章:
基于Java数据采集入库(一)
基于Java数据采集入库(二)
基于Java数据采集入库(三)
分别实现了
①抓取页面信息并显示
②简单采集入库存储
③调用本地数据库查询
④远程调用实现操作(未实现)
以上这些功能都是基于本地的,有时候我们需要远程去调用这类数据,这时我们就可以用JAVA提供的RMI机制实行远程调用访问。
当然也可以用WebServices实现(PHP版本,有时间再写个JAVA版本的)
什么是RMI?
RMI 指的是远程方法调用 (Remote Method Invocation)。它是一种机制,能够让在某个 Java虚拟机上的对象调用另一个 Java 虚拟机中的对象上的方法。可以用此方法调用的任何对象必须实现该远程接口。调用这样一个对象时,其参数为 "marshalled" 并将其从本地虚拟机发送到远程虚拟机(该远程虚拟机的参数为 "unmarshalled")上。该方法终止时,将编组来自远程机的结果并将结果发送到调用方的虚拟机。如果方法调用导致抛出异常,则该异常将指示给调用方。
简单了解下RMI,看下简单实现吧
1、定义远程接口
首先,我们需要写个远程接口IHello 该接口继承了远程对象Remote.
接口IHello里面有个hello的方法,用于客户端连接后 打招呼.
由于IHello继承了远程Remote对象, 所以需要抛一个 RemoteException 远程异常.
1 import java.rmi.Remote;
2 import java.rmi.RemoteException;
3
4
5 public interface IHello extends Remote{
6
7 public String hello(String name) throws RemoteException;
8 }
2、实现接口
接下来,我们实现下 该接口里的方法, 实现接口的方法在服务端.
这里的HelloImpl类 实现了接口IHello里的方法.
注意:这里HelloImpl同样继承了 UnicastRemoteObject 远程对象,这个必须写,不然服务端启动后会莫名其妙报错.
1 import java.rmi.RemoteException;
2 import java.rmi.server.UnicastRemoteObject;
3
4 /**
5 * UnicastRemoteObject 这个必须写,虽然不写代码也不会出错,但在运行服务器的时候会出现莫名错误
6 * @author Balla_兔子
7 *
8 */
9 public class HelloImpl extends UnicastRemoteObject implements IHello {
10
11 protected HelloImpl() throws RemoteException {
12 super();
13 }
14
15 @Override
16 public String hello(String name) {
17 String strHello="你好!"+name+"正在访问服务端";
18 System.out.println(name+"正在访问服务端");
19 return strHello;
20 }
21
22 }
3、编写服务端
服务端,由于RMI实现远程访问的机制是指:客户端通过在RMI注册表上寻找远程接口对象的地址(服务端地址) 达到实现远程访问的目的,
所以,我们需要在服务端创建一个远程对象的注册表,用于绑定和注册 服务端地址 和 远程接口对象,便于后期客户端能够成功找到服务端
1 import java.rmi.Naming;
2 import java.rmi.RemoteException;
3 import java.rmi.registry.LocateRegistry;
4
5
6 public class Server {
7
8 /**
9 * @param args
10 */
11 public static void main(String[] args) {
12 try {
13 IHello hello=new HelloImpl();
14 int port=6666;
15 LocateRegistry.createRegistry(port);
16 String address="rmi://localhost:"+port+"/tuzi";
17 Naming.bind(address, hello);
18 System.out.println(">>>服务端启动成功");
19 System.out.println(">>>请启动客户端进行连接访问..");
20
21 } catch (Exception e) {
22 e.printStackTrace();
23 }
24 }
25
26 }
4、编写客户端
客户端上同样需要定义一个 远程访问的地址 - 即服务端地址,
然后,通过在RMI注册表上寻找该地址; 如果找到 则建立连接.
1 import java.net.MalformedURLException;
2 import java.rmi.Naming;
3 import java.rmi.NotBoundException;
4 import java.rmi.RemoteException;
5 import java.util.Scanner;
6
7
8 public class Client {
9 public static void main(String[] args) {
10
11 int port=6666;
12 String address="rmi://localhost:"+port+"/tuzi";
13 try {
14 IHello hello=(IHello) Naming.lookup(address);
15 System.out.println("<<<客户端访问成功!");
16 //客户端 Client 调用 远程接口里的 sayHello 方法 并打印出来
17 System.out.println(hello.hello("Rabbit"));
18 Scanner scanner=new Scanner(System.in);
19 String input=scanner.next();
20 } catch (MalformedURLException e) {
21 // TODO Auto-generated catch block
22 e.printStackTrace();
23 } catch (RemoteException e) {
24 // TODO Auto-generated catch block
25 e.printStackTrace();
26 } catch (NotBoundException e) {
27 // TODO Auto-generated catch block
28 e.printStackTrace();
29 }
30
31 }
32 }
运行效果图:
华丽的分割线
接下来就来看看我们的程序吧,今天换种口味来采集下《2013-2014赛季常规赛排名》
这是数据网址:http://nbadata.sports.qq.com/teams_stat.aspx
先上效果图:
好了,剩下的上代码吧,具体看代码注释:
IdoAction.java (功能调用接口代码)
1 package com.lcw.rmi.collection;
2
3 import java.rmi.Remote;
4 import java.rmi.RemoteException;
5 import java.util.List;
6
7 public interface IdoAction extends Remote{
8
9
10 public void initData() throws RemoteException;
11
12 public void getAllDatas() throws RemoteException;
13
14 public List<String> getAllTeams() throws RemoteException;
15
16 public List<String> getTeamInfo(String team) throws RemoteException;
17
18 public List<String> getAllInfo() throws RemoteException;
19
20 }
IdoAction.java
doActionImpl.java (接口实现类)
1 package com.lcw.rmi.collection;
2
3 import java.rmi.RemoteException;
4 import java.rmi.server.UnicastRemoteObject;
5 import java.sql.ResultSet;
6 import java.sql.SQLException;
7 import java.util.ArrayList;
8 import java.util.List;
9
10 public class doActionImpl extends UnicastRemoteObject implements IdoAction {
11
12 /**
13 *
14 */
15 private static final long serialVersionUID = 1L;
16 private Mysql mysql;
17 private ResultSet resultSet;
18
19 public doActionImpl() throws RemoteException {
20 mysql = new Mysql();
21 }
22
23 @Override
24 public void getAllDatas() throws RemoteException {
25 // 调用采集类,获取所有数据
26 CollectData data = new CollectData();
27 data.getAllDatas();
28 System.out.println("数据采集成功!");
29 }
30
31 @Override
32 public List<String> getAllInfo() throws RemoteException {
33 // 查询所有数据
34 String sql = "select * from data";
35 resultSet = mysql.querySQL(sql);
36 List<String> list=new ArrayList<String>();
37 System.out.println("当前执行命令5,正在获取NBA(2013-2014)赛季常规赛队伍所有信息..");
38 System.out.println("获取成功,已在客户端展示..");
39 try {
40 while(resultSet.next()) {
41 for (int i = 2; i < 17; i++) {
42 //System.out.println("++++++++++++++");调试
43 list.add(resultSet.getString(i));
44 }
45 System.out.println();
46 }
47 } catch (SQLException e) {
48 e.printStackTrace();
49 }
50 return list;
51 }
52
53 @Override
54 public List<String> getAllTeams() throws RemoteException {
55 // 查询所有队伍名称
56 String sql = "select team from data";
57 resultSet = mysql.querySQL(sql);
58 List<String> list = new ArrayList<String>();
59 System.out.println("当前执行命令3,正在获取NBA(2013-2014)赛季常规赛队伍..");
60 System.out.println("获取成功,已在客户端展示..");
61 try {
62 while (resultSet.next()) {
63 list.add(resultSet.getString("team"));
64 }
65 } catch (SQLException e) {
66 System.out.println("数据库暂无信息,请执行自动化采集命令");
67 e.printStackTrace();
68 }
69 return list;
70
71 }
72
73 @Override
74 public List<String> getTeamInfo(String team) throws RemoteException {
75 // 根据队伍查询队伍信息
76 ResultSet resultSet = mysql.querySQL("select * from data where team='"
77 + team + "'");
78 List<String> list=new ArrayList<String>();
79 System.out.println("当前执行命令4,正在获取用户所查询队伍信息..");
80 System.out.println("获取成功,已在客户端展示..");
81 try {
82 if (resultSet.next()) {
83 for (int i = 2; i < 17; i++) {
84 list.add(resultSet.getString(i));
85 }
86 }
87 System.out.println();
88 } catch (SQLException e) {
89 System.out.println("数据库暂无信息,请执行自动化采集命令");
90 e.printStackTrace();
91 }
92 return list;
93 }
94
95 @Override
96 public void initData() throws RemoteException {
97 // 初始化数据库
98 String sql = "delete from data";
99 try {
100 mysql.updateSQL(sql);
101 System.out.println("数据库初始化成功!");
102 } catch (Exception e) {
103 System.out.println("数据库初始化失败!");
104 }
105
106 }
107
108 }
doActionImpl.java
CollectData.java (采集主类)
1 package com.lcw.rmi.collection;
2
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.io.InputStream;
6 import java.io.InputStreamReader;
7 import java.net.MalformedURLException;
8 import java.net.URL;
9 import java.util.ArrayList;
10 import java.util.Arrays;
11 import java.util.List;
12
13 public class CollectData {
14
15 /**
16 * 采集类,获取所有数据
17 */
18 public void getAllDatas() {
19 String address = "http://nbadata.sports.qq.com/teams_stat.aspx";// 要采集数据的url
20 try {
21 URL url = new URL(address);
22 try {
23 InputStream inputStream = url.openStream();// 打开url,返回字节流
24 InputStreamReader inputStreamReader = new InputStreamReader(
25 inputStream, "gbk");// 将字节流转换为字符流,编码utf-8
26 BufferedReader reader = new BufferedReader(inputStreamReader);// 提高效率,缓存
27 String rankRegEx = ">\\d{1,2}</td>";// 排名正则
28 String teamRegEx = ">[^<>]*</a>";// 队名正则
29 String dataRegEx = ">\\d{1,3}(\\.)\\d{0,2}</td>";// 正常数据正则
30 String percentRegEX = ">\\d{1,2}(\\.)*(\\d)*%</span></td>";// 百分比数据
31 GetRegExData regExData = new GetRegExData();
32 String temp = "";// 存放临时读取数据
33 int flag = 0;
34 String tempRank = "";// 存放匹配到的返回数据
35 String tempTeam = "";// 存放匹配到的返回数据
36 String tempData = "";
37 String tempPercent = "";
38 List<String> list = new ArrayList<String>();
39 Mysql mysql = new Mysql();
40 while ((temp = reader.readLine()) != null) {
41 // 匹配排名
42 if ((tempRank = regExData.getData(rankRegEx, temp)) != "") {
43 tempRank = tempRank.substring(1, tempRank
44 .indexOf("</td>"));
45 // System.out.println("排名:" + tempRank);
46 list.add(tempRank);
47 flag++;
48 }
49 // 匹配球队
50 // 由于该正则会匹配到其他地方的数据,需给它一个标识符,让它从"找到排名位置"才开始匹配
51 if ((tempTeam = regExData.getData(teamRegEx, temp)) != ""
52 && flag == 1) {
53 tempTeam = tempTeam.substring(1, tempTeam
54 .indexOf("</a>"));
55 // System.out.println("球队名称:" + tempTeam);
56 list.add(tempTeam);
57 flag = 0;
58 }
59 // 匹配正常数据
60 if ((tempData = regExData.getData(dataRegEx, temp)) != "") {
61 tempData = tempData.substring(1, tempData
62 .indexOf("</td>"));
63 // System.out.println(tempData);
64 list.add(tempData);
65
66 }
67 // 匹配百分比数据
68 if ((tempPercent = regExData.getData(percentRegEX, temp)) != "") {
69 tempPercent = tempPercent.substring(1, tempPercent
70 .indexOf("</span></td>"));
71 // System.out.println(tempPercent);
72 list.add(tempPercent);
73 }
74
75 }
76 reader.close();
77 Object[] arr = list.toArray();// 将集合转换为数组
78 int a = -15;
79 int b = 0;
80 String sql = "insert into data(rank,team,chushou1,mingzhong1,chushou2,mingzhong2,chushou3,mingzhong3,qianchang,houchang,zong,zhugong,shiwu,fangui,defen) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)";
81 for (int i = 0; i < 30; i++) {
82 a += 15;
83 b += 15;
84 if (b <= 450) {
85 Object[] arr1 = Arrays.copyOfRange(arr, a, b);
86 mysql.insertNewData(sql, arr1);
87 System.out.println("正在采集数据..当前采集数据:" + (i + 1) + "条");
88 }
89 }
90
91 } catch (IOException e) {
92 e.printStackTrace();
93 }
94 } catch (MalformedURLException e) {
95 e.printStackTrace();
96 }
97 }
98
99 }
CollectData.java
GetRegExData.java (正则过滤功能类)
1 package com.lcw.rmi.collection;
2
3 import java.util.regex.Matcher;
4 import java.util.regex.Pattern;
5
6 public class GetRegExData {
7
8 public String getData(String regex, String content) {
9 Pattern pattern = Pattern.compile(regex);
10 Matcher matcher = pattern.matcher(content);
11 if (matcher.find()) {
12 return matcher.group();
13 } else {
14 return "";
15 }
16
17 }
18 }
GetRegExData.java
Mysql.java (数据库操作类)
1 package com.lcw.rmi.collection;
2
3 import java.sql.Connection;
4 import java.sql.DriverManager;
5 import java.sql.PreparedStatement;
6 import java.sql.ResultSet;
7 import java.sql.SQLException;
8
9 public class Mysql {
10
11 private String driver = "com.mysql.jdbc.Driver";
12 private String url = "jdbc:mysql://localhost:3306/nba";
13 private String user = "root";
14 private String password = "";
15
16 private PreparedStatement stmt = null;
17 private Connection conn = null;
18 private ResultSet resultSet = null;
19
20 /**
21 *
22 * @param insertSql
23 * 采集类,插入数据操作
24 * @param arr
25 */
26 public void insertNewData(String insertSql, Object[] arr) {
27
28 try {
29 Class.forName(driver).newInstance();
30 try {
31 conn = DriverManager.getConnection(url, user, password);
32 stmt = conn.prepareStatement(insertSql);
33 stmt.setString(1, arr[0].toString());
34 stmt.setString(2, arr[1].toString());
35 stmt.setString(3, arr[2].toString());
36 stmt.setString(4, arr[3].toString());
37 stmt.setString(5, arr[4].toString());
38 stmt.setString(6, arr[5].toString());
39 stmt.setString(7, arr[6].toString());
40 stmt.setString(8, arr[7].toString());
41 stmt.setString(9, arr[8].toString());
42 stmt.setString(10, arr[9].toString());
43 stmt.setString(11, arr[10].toString());
44 stmt.setString(12, arr[11].toString());
45 stmt.setString(13, arr[12].toString());
46 stmt.setString(14, arr[13].toString());
47 stmt.setString(15, arr[14].toString());
48 stmt.executeUpdate();
49 stmt.close();
50 conn.close();
51
52 } catch (SQLException e) {
53 e.printStackTrace();
54 }
55 } catch (InstantiationException e) {
56 e.printStackTrace();
57 } catch (IllegalAccessException e) {
58 e.printStackTrace();
59 } catch (ClassNotFoundException e) {
60 e.printStackTrace();
61 }
62
63 }
64
65 /**
66 *
67 * @param sql更新数据库语句
68 */
69 public void updateSQL(String updateSql) {
70 try {
71 Class.forName(driver).newInstance();
72 try {
73 conn = DriverManager.getConnection(url, user, password);
74 } catch (SQLException e) {
75 e.printStackTrace();
76 }
77 try {
78 stmt = conn.prepareStatement(updateSql);
79 stmt.execute(updateSql);
80 } catch (SQLException e) {
81 e.printStackTrace();
82 }
83
84 } catch (InstantiationException e) {
85 e.printStackTrace();
86 } catch (IllegalAccessException e) {
87 e.printStackTrace();
88 } catch (ClassNotFoundException e) {
89 e.printStackTrace();
90 }
91 }
92
93 /**
94 *
95 * @param sql一般查询
96 */
97 public ResultSet querySQL(String searchSql) {
98 try {
99 Class.forName(driver).newInstance();
100 try {
101 conn = DriverManager.getConnection(url, user, password);
102 } catch (SQLException e) {
103 e.printStackTrace();
104 }
105 try {
106 stmt = conn.prepareStatement(searchSql);
107 resultSet = stmt.executeQuery();
108 } catch (SQLException e) {
109 e.printStackTrace();
110 }
111
112 } catch (InstantiationException e) {
113 e.printStackTrace();
114 } catch (IllegalAccessException e) {
115 e.printStackTrace();
116 } catch (ClassNotFoundException e) {
117 e.printStackTrace();
118 }
119 return resultSet;
120 }
121 }
Mysql.java
Server.java (服务端类)
1 package com.lcw.rmi.collection;
2
3 import java.net.MalformedURLException;
4 import java.rmi.AlreadyBoundException;
5 import java.rmi.Naming;
6 import java.rmi.RemoteException;
7 import java.rmi.registry.LocateRegistry;
8
9 public class Server {
10
11 /**
12 * @param args
13 */
14 public static void main(String[] args) {
15 try {
16 int port = 9797;
17 String address = "rmi://localhost:"+port+"/nba";
18 IdoAction action = new doActionImpl();
19 LocateRegistry.createRegistry(port);
20 try {
21 Naming.bind(address, action);
22 System.out.println(">>>正在启动服务端..");
23 System.out.println(">>>服务端启动成功!");
24 System.out.println(">>>等待客户端连接...");
25 System.out.println(">>>客户端Balla_兔子已连接。");
26 } catch (MalformedURLException e) {
27 e.printStackTrace();
28 } catch (AlreadyBoundException e) {
29 e.printStackTrace();
30 }
31 } catch (RemoteException e) {
32 e.printStackTrace();
33 }
34 }
35
36 }
Server.java
Client.java (客户端类)
1 package com.lcw.rmi.collection;
2
3 import java.net.MalformedURLException;
4 import java.rmi.Naming;
5 import java.rmi.NotBoundException;
6 import java.rmi.RemoteException;
7 import java.util.List;
8 import java.util.Scanner;
9
10 public class Client {
11
12 public static void main(String[] args) {
13 int port = 9797;
14 String address = "rmi://localhost:" + port + "/nba";
15
16 try {
17 IdoAction action = (IdoAction) Naming.lookup(address);
18 System.out.println("正在启动客户端..");
19 System.out.println("客户端启动完毕,正在连接服务端..");
20 System.out.println("连接成功...");
21 System.out.println("---------------------------");
22
23 while (true) {
24 System.out.println("①初始化数据库-请按 (1)");
25 System.out.println();
26 System.out.println("②自动化采集NBA(2013-2014)赛季常规赛排名数据-请按(2)");
27 System.out.println();
28 System.out.println("③查询NBA(2013-2014)赛季常规赛排名所有队伍-请按(3)");
29 System.out.println();
30 System.out.println("④查询具体球队(2013-2014)赛季常规赛排名-请按(4)");
31 System.out.println();
32 System.out.println("⑤查询具体详情-请按(5)");
33 System.out.println();
34
35 Scanner scanner = new Scanner(System.in);
36 String input = scanner.next();
37
38 if (input.equals("1")) {
39 System.out
40 .println("---------------------------------------------------------");
41 System.out.println("服务端数据已初始化,请按2进行数据自动化采集..");
42 action.initData();
43 System.out
44 .println("---------------------------------------------------------");
45 }
46 if (input.equals("2")) {
47 System.out
48 .println("---------------------------------------------------------");
49 System.out.println("数据自动化采集中,请稍后..");
50 int i=0;
51 while(i<10000){//延迟操作,给数据采集缓冲时间
52 i++;
53 }
54 System.out.println("数据采集完毕..按3,4,5进行相关操作");
55 action.getAllDatas();
56 System.out
57 .println("---------------------------------------------------------");
58 }
59 if (input.equals("3")) {
60 System.out
61 .println("---------------------------------------------------------");
62 System.out.println("正在获取NBA(2013-2014)赛季常规赛队伍,请稍后..");
63 System.out.println();
64 List<String> list = action.getAllTeams();
65 for (int i = 0; i < list.size(); i++) {
66 if (i % 5 == 0 && i != 0) {
67 System.out.println();
68 }
69 System.out.print(list.get(i) + "\t");
70 }
71 System.out.println();
72
73 System.out
74 .println("---------------------------------------------------------");
75 }
76 if (input.equals("4")) {
77 System.out
78 .println("---------------------------------------------------------");
79 System.out.println("请输入你要查询的队伍名称(如:76人)");
80 String team = scanner.next();
81 System.out
82 .print("排名\t球队\t出手\t命中率\t出手\t命中率\t出手\t命中率\t前场\t后场\t总\t助攻\t失误\t犯规\t得分");
83 System.out.println();
84 List<String> list=action.getTeamInfo(team);
85 for (int i = 0; i < 15; i++) {
86 System.out.print(list.get(i)+"\t");
87 }
88 System.out.println();
89 System.out
90 .println("---------------------------------------------------------");
91 }
92 if (input.equals("5")) {
93 System.out
94 .println("---------------------------------------------------------");
95 System.out.println("数据获取中,请稍后...");
96 System.out.println();
97 System.out
98 .print("排名\t球队\t出手\t命中率\t出手\t命中率\t出手\t命中率\t前场\t后场\t总\t助攻\t失误\t犯规\t得分");
99 System.out.println();
100 List<String> list=action.getAllInfo();
101 for(int i=0;i<450;i++){
102 if(i%15==0&&i!=0){
103 System.out.println();
104 }
105 System.out.print(list.get(i)+"\t");
106 }
107 System.out.println();
108 System.out
109 .println("---------------------------------------------------------");
110 }
111 }
112 } catch (MalformedURLException e) {
113 e.printStackTrace();
114 } catch (RemoteException e) {
115 e.printStackTrace();
116 } catch (NotBoundException e) {
117 e.printStackTrace();
118 }
119 }
120 }
Client.java
好了,关于JAVA采集数据文章就到此为止了~ 撤··