任务:

爬取懂球帝页面

存在问题:

没有解析页面

同时存储数据持久化

1.添加maven依赖

<dependencies>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.5</version>
</dependency>

<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.47</version>

</dependency>


</dependencies>

2.编写请求

import com.alibaba.fastjson.JSONObject;
import org.apache.http.HttpEntity;
import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicHeader;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;public class HttpClientService {
/**
31 * 返回成功状态码
32 */
private static final int SUCCESS_CODE = 200; /**
* 发送GET请求
* @param url 请求url
* @param nameValuePairList 请求参数
* @return JSON或者字符串
* @throws Exception
*/
public static Object sendGet(String url, List<NameValuePair> nameValuePairList) throws Exception{
JSONObject jsonObject = null;
CloseableHttpClient client = null;
CloseableHttpResponse response = null;
try{
/**
* 创建HttpClient对象
*/
client = HttpClients.createDefault();
/**
52 * 创建URIBuilder
53 */
URIBuilder uriBuilder = new URIBuilder(url);
/**
56 * 设置参数
57 */
uriBuilder.addParameters(nameValuePairList);
/**
60 * 创建HttpGet
61 */
HttpGet httpGet = new HttpGet(uriBuilder.build());
/**
64 * 设置请求头部编码
65 */
httpGet.setHeader(new BasicHeader("Content-Type", "application/x-www-form-urlencoded; charset=utf-8"));
/**
6 * 设置返回编码
69 */
httpGet.setHeader(new BasicHeader("Accept", "text/plain;charset=utf-8"));
/**
72 * 请求服务
73 */
response = client.execute(httpGet);
/**
* 获取响应吗
*/
int statusCode = response.getStatusLine().getStatusCode(); if (SUCCESS_CODE == statusCode){
/**
* 获取返回对象
*/
HttpEntity entity = response.getEntity();
/**
* 通过EntityUitls获取返回内容
*/
String result = EntityUtils.toString(entity,"UTF-8");
/**
* 转换成json,根据合法性返回json或者字符串
*/
try{
jsonObject = JSONObject.parseObject(result);
return jsonObject;
}catch (Exception e){
return result;
}
}else{ }
}catch (Exception e){ } finally {
response.close();
client.close();
}
return null;
} /*** 发送POST请求
112 * @param url
113 * @param nameValuePairList
114 * @return JSON或者字符串
115 * @throws Exception
116 */
public static Object sendPost(String url, List<NameValuePair> nameValuePairList) throws Exception{
JSONObject jsonObject = null;
CloseableHttpClient client = null;
CloseableHttpResponse response = null;
try{
/**
123 * 创建一个httpclient对象
124 */
client = HttpClients.createDefault();
/**
127 * 创建一个post对象
128 */
HttpPost post = new HttpPost(url);
/**
131 * 包装成一个Entity对象
132 */
StringEntity entity = new UrlEncodedFormEntity(nameValuePairList, "UTF-8");
/**
135 * 设置请求的内容
136 */
post.setEntity(entity);
/**
139 * 设置请求的报文头部的编码
140 */
post.setHeader(new BasicHeader("Content-Type", "application/x-www-form-urlencoded; charset=utf-8"));
/**
143 * 设置请求的报文头部的编码
144 */
post.setHeader(new BasicHeader("Accept", "text/plain;charset=utf-8"));
/**
147 * 执行post请求
148 */
response = client.execute(post);
/**
151 * 获取响应码
152 */
int statusCode = response.getStatusLine().getStatusCode();
if (SUCCESS_CODE == statusCode){
/**
156 * 通过EntityUitls获取返回内容
157 */
String result = EntityUtils.toString(response.getEntity(),"UTF-8");
/**
160 * 转换成json,根据合法性返回json或者字符串
161 */
try{
jsonObject = JSONObject.parseObject(result);
return jsonObject;
}catch (Exception e){
return result;
}
}else{ }
}catch (Exception e){ }finally {
response.close();
client.close();
}
return null;
} /**
181 * 组织请求参数{参数名和参数值下标保持一致}
182 * @param params 参数名数组
183 * @param values 参数值数组
184 * @return 参数对象
185 */
public static List<NameValuePair> getParams(Object[] params, Object[] values){
/**
188 * 校验参数合法性
189 */
boolean flag = params.length>0 && values.length>0 && params.length == values.length;
if (flag){
List<NameValuePair> nameValuePairList = new ArrayList<NameValuePair>();
for(int i =0; i<params.length; i++){
nameValuePairList.add(new BasicNameValuePair(params[i].toString(),values[i].toString()));
}
return nameValuePairList;
}else{ }
return null;

              

3.写mian方法进行调用

import org.apache.http.NameValuePair;

import java.util.List;

/**
* 发送post/get 测试类
*/
public class httpclientmain {

public static void main(String[] args) throws Exception{
String url = "http://www.dongqiudi.com/data";
/**
* 参数值
*/
Object [] params = new Object[]{"param1","param2"};
/**
* 参数名
*/
Object [] values = new Object[]{"value1","value2"};
/**
* 获取参数对象
*/
List<NameValuePair> paramsList = HttpClientService.getParams(params, values);
/**
* 发送get
*/
Object result = HttpClientService.sendGet(url, paramsList);



System.out.println("GET返回信息:" + result);

}
}

4.返回结果 目前只是爬虫爬取工作得第一步

还要解析网页抽取出需要的数据

将数据进行存储