java爬虫.HttpClient.Get请求
原创
©著作权归作者所有:来自51CTO博客作者阿呆小记的原创作品,请联系作者获取转载授权,否则将追究法律责任
Get请求
HttpGet请求响应的一般步骤:
1). 创建HttpClient对象,可以使用HttpClients.createDefault();
2).
如果是无参数的GET请求:
直接使用构造方法HttpGet(String url)创建HttpGet对象即可;
如果是带参数GET请求:
先使用URIBuilder(String url)创建对象,
再调用addParameter(String param, String value),或setParameter(String param, String value)来设置请求参数,
并调用build()方法构建一个URI对象。
只有构造方法HttpGet(URI uri)来创建HttpGet对象。
3). 创建HttpResponse:
调用HttpClient对象的execute(HttpUriRequest request)发送请求,该方法返回一个HttpResponse。
调用HttpResponse的getAllHeaders()、getHeaders(String name)等方法可获取服务器的响应头;
调用HttpResponse的getEntity()方法可获取HttpEntity对象,该对象包装了服务器的响应内容。
程序可通过该对象获取服务器的响应内容,通过调用getStatusLine().getStatusCode()可以获取响应状态码。
4). 释放连接。
无参
例:
先从https://mvnrepository.com/maven资源站上提取httpclient相关源码,编辑.xml文件。
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>cn.itcast</groupId>
<artifactId>itcast-crawler-first</artifactId>
<packaging>pom</packaging>
<version>1.0-SNAPSHOT</version>
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-log4j12 -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.25</version>
</dependency>
</dependencies>
</project>
再创建个.Java测试类
package cn.csdn.crawlar.test;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import java.io.IOException;
public class HttpGetTest {
public static void main(String[] args) {
//创建HttpClient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//创建HttpGet对象,设置url访问地址
String uri =
HttpGet httpGet = new HttpGet(uri);
//try/catch/finally : Ctrl+Alt+T
CloseableHttpResponse response = null;
try {
//使用HttpClient发起请求,获取response
response = httpClient.execute(httpGet);
//解析响应
if (response.getStatusLine().getStatusCode() == 200) {
String content = EntityUtils.toString(response.getEntity(), "utf8");
System.out.println(content.length());
}
} catch (IOException e) {
e.printStackTrace();
} finally {
//关闭response
try {
response.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
httpClient.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
在运行后的控制面上,将会找出如图所示的一些内容:
首行是Get请求,接着会发现下面有 HTTP/1.1 200 OK[\r][\n] ,200则表示请求成功。
![在这里插入图片描述 java爬虫.HttpClient.Get请求_java](https://s2.51cto.com/images/blog/202208/12100311_62f5b4df038f699303.png?x-oss-process=image/watermark,size_16,text_QDUxQ1RP5Y2a5a6i,color_FFFFFF,t_30,g_se,x_10,y_10,shadow_20,type_ZmFuZ3poZW5naGVpdGk=/resize,m_fixed,w_1184)
含参
package cn.csdn.crawlar.test;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import java.io.IOException;
public class HttpGetTest含参 {
public static void main(String[] args) throws Exception {
//创建HttpClient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//设置请求地址是:https://www.icourse163.org/search.htm?search=java#/
//创建URI地址
String string="https://www.icourse163.org/search.htm";
URIBuilder uriBuilder = new URIBuilder( string );
//设置参数
String param="search",value="java";
uriBuilder.setParameter(param,value );
//创建HttpGet对象,设置url访问地址
HttpGet httpGet = new HttpGet(uriBuilder.build());
//发起请求
System.out.println("发起请求的信息"+httpGet);
//try/catch/finally : Ctrl+Alt+T
CloseableHttpResponse response = null;
try {
//使用HttpClient发起请求,获取response
response = httpClient.execute(httpGet);
//解析响应
if (response.getStatusLine().getStatusCode() == 200) {
String content = EntityUtils.toString(response.getEntity(), "utf8");
System.out.println(content.length());
}
} catch (IOException e) {
e.printStackTrace();
} finally {
//关闭response
try {
response.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
httpClient.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
结果也会显示出有一些基础的设置,以及访问网站的相关链接
![在这里插入图片描述 java爬虫.HttpClient.Get请求_maven_02](https://s2.51cto.com/images/blog/202208/12100311_62f5b4df93fb419229.png?x-oss-process=image/watermark,size_16,text_QDUxQ1RP5Y2a5a6i,color_FFFFFF,t_30,g_se,x_10,y_10,shadow_20,type_ZmFuZ3poZW5naGVpdGk=/resize,m_fixed,w_1184)