Java爬虫-URLConnection使用实例
原创
©著作权归作者所有:来自51CTO博客作者陶然同学的原创作品,请联系作者获取转载授权,否则将追究法律责任
GET方法
@Test
public void testGet() throws Exception {
//1.确定要访问/爬取的URL
URL url = new URL("https://www.itcast.cn/");
//2.获取连接对象
HttpURLConnection urlConnection = (HttpURLConnection)url.openConnection();
//3.设置连接信息:请求方式/请求参数/请求头....
urlConnection.setRequestMethod("GET");
urlConnection.setRequestProperty("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36");
urlConnection.setConnectTimeout(30000);
//4.获取数据
InputStream in = urlConnection.getInputStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
String line;
String html = "";
while ((line = reader.readLine()) != null){
html += line + "\n";
}
System.out.println(html);
//5.关闭资源
in.close();
reader.close();
}
POST方法
@Test
public void testPost() throws Exception {
//1.确定URL
URL url = new URL("https://www.itcast.cn/");
//2.获取连接
HttpURLConnection urlConnection = (HttpURLConnection) url.openConnection();
//3.设置连接信息
urlConnection.setDoOutput(true);
urlConnection.setRequestMethod("POST"); //请求方式默认是GET
urlConnection.setRequestProperty("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36");
urlConnection.setConnectTimeout(30000);
OutputStream out = urlConnection.getOutputStream();
out.write("username=xx".getBytes());
//4.获取数据
InputStream in = urlConnection.getInputStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
String line;
String html = "";
while ((line = reader.readLine()) != null){
html += line + "\n";
}
System.out.println(html);
//5.关闭资源
in.close();
reader.close();
}