前言
本文主要用于学习交流Java并发编程,锤炼Java多线程使用的技能点
- 线程池
- 线程资源共享
- 多线程读写问题
使用IP代理方式,从代理商获取IP,每天稳定增加访问量2w(全部博客总量,而非单篇)左右。
此方式仅供学习研究,切勿实际使用!!!后果自负
实现思路
先通过博客列表页面,获得全部博客URL
开启n个线程,每隔线程隔10s开始执行任务
获取代理IP(200个),每个线程每隔60~120s随机时间间隔代理访问一个博客
如果你不停机,代理商不挂,此线程会刷到天荒地老!
此思路方法仅为本人学习多线程,解决并发问题的模拟DEMO,不要真实使用到现实中,代码提供仅供学习交流,希望大家有更好的思路可以提供!
还是要保障自己的博客质量高,才是王道,喜欢的朋友点个赞呗~
使用攻略
- 初始化,执行StepOne(下面有源码),获取你所有的博客访问地址
- 购买IP代理服务,我用的是大象代理
- 将订单ID,按照代码提示填写
- 设置线程开启数量
- 线程数量,电脑好点的自信点来个1000,大概一天能刷2w-4w,不建议设置太高size(50-2000)=访问量(5k~50k)
- 默认300个,实测一天2W
- 开启运行
具体代码(实测可用)
import .RefreshBlogThreadNew;
import java.util.HashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;
public class ExcuteLocal {
/**
* 使用指南:
* 1、先按照提示执行 StepOne 的Main方法,获取你的全部博客Url(如果你可以手动粘贴复制你自己所有的博客地址也行)
* 2、把博客地址(已经拼接成java代码),复制粘贴到本类getLocalBlogUrl ---》指定位置!
* 3、访问http://www.daxiangdaili.com/ 购买(推荐一天9块的),把订单ID填充到下面proxyOrderId位置
* 4、点击运行下面main方法,一天1w-2w访问量
* @param args
*/
public static void main(String[] args) {
// ==========================================================
// http://www.daxiangdaili.com/ 访问这个购买id,然后就能执行了
// 购买一天的就行,买了把订单号写这儿!!
// ==========================================================
String proxyOrderId = "55811xxxxx87931";
// ==========================================================
// 看你心情设置
// 线程数量,电脑好点的自信点来个1000,大概一天能刷2w-4w,不建议设置太高size(50-2000)=访问量(5k~50k)
// ==========================================================
int threadSize = 300;
// 初始化需要刷的博客地址
HashMap<Integer, String> localBlogUrl = getLocalBlogUrl();
// 初始化对应博客计数map,前面是博客地址,后面是博客被访问次数
HashMap<Integer, AtomicInteger> localBlogUrlCount = new HashMap<Integer, AtomicInteger>();
for (int i = 0; i < localBlogUrl.size(); i++) {
localBlogUrlCount.put(i, new AtomicInteger());
}
// 初始化总计数器,使用原子类,直接调用incr方法,防止线程间同时写入,导致的ABA问题
AtomicInteger count = new AtomicInteger();
ExecutorService executorService = Executors.newFixedThreadPool(threadSize);
for (int i = 1; i <= threadSize; i++) {
executorService.execute(new Thread(new RefreshBlogThreadNew(i * 10000, localBlogUrl, localBlogUrlCount, count, proxyOrderId), "thread-refresh-" + i));
}
}
//获取本地BlogUrl.txt文本中的博客地址,并装入hashMap中,key=Integer,value=博客地址
public static HashMap<Integer, String> getLocalBlogUrl() {
HashMap<Integer, String> hashMap = new HashMap<Integer, String>();
int id = 1;
// ----------------------------指定位置
hashMap.put(id++, "");
hashMap.put(id++, "");
// ----------------------------指定位置
return hashMap;
}
}
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
public class RefreshBlogThreadNew implements Runnable {
//本地博客地址文本中的文章数量
private int blogUrlSize = 0;
//本地博客地址文本装入HashMap中
private static HashMap<Integer, String> LocalBlogUrl = null;
//本地博客地址访问统计
private HashMap<Integer, AtomicInteger> LocalBlogUrlCount = null;
//访问总量统计
private AtomicInteger count = null;
private int sleepSec = 0;
private String proxyOrderId = "";
public RefreshBlogThreadNew(int sleepSec, HashMap<Integer, String> localBlogUrl, HashMap<Integer, AtomicInteger> localBlogUrlCount, AtomicInteger count, String proxyOrderId) {
this.proxyOrderId = proxyOrderId;
this.LocalBlogUrl = localBlogUrl;
this.sleepSec = sleepSec;
this.blogUrlSize = LocalBlogUrl.size();
this.LocalBlogUrlCount = localBlogUrlCount;
this.count = count;
}
@Override
public void run() {
String threadName = Thread.currentThread().getName();
System.out.println(threadName + "----sleep" + (sleepSec));
try {
Thread.sleep(sleepSec);
System.out.println(threadName + "请求代理");
} catch (InterruptedException e) {
e.printStackTrace();
}
while (true) {
//比如你的订单号是123456789,每次你想提取200个代理进行使用,就应该是
String url = "http://tvp.daxiangdaili.com/ip/?tid="+proxyOrderId+"&num=200&delay=5";
List<MyIp> ipList = getIp(url);
for (MyIp myIp : ipList) {
System.setProperty("http.maxRedirects", "50");
System.getProperties().setProperty("proxySet", "true");
System.getProperties().setProperty("http.proxyHost", myIp.getAddress());
System.getProperties().setProperty("http.proxyPort", myIp.getPort());
while (true) {
try {
int id = 0;
String urlStr = null;
while (StringUtils.isBlank(urlStr)) {
id = randomBlogUrl();
urlStr = LocalBlogUrl.get(id);
}
Document doc = Jsoup.connect(LocalBlogUrl.get(id))
.userAgent("Mozilla")
.cookie("auth", "token")
.timeout(3000)
.get();
if (doc != null) {
count.incrementAndGet();
LocalBlogUrlCount.get(id).incrementAndGet();
// LocalBlogUrlCount.put(id, LocalBlogUrlCount.get(id) + 1);
System.out.print("ID: " + id + "\tAddress: " + (LocalBlogUrl.get(id) + "\t成功刷新次数: " + count + "\t") + "Proxy: " + myIp.toString() + "\t");
}
} catch (IOException e) {
}
try {
sleepThread(randomClick());
} catch (InterruptedException e) {
e.printStackTrace();
}
show();
}
}
}
}
//访问文章的随机函数,用来模拟真实的访问量操作,以免所有的文章访问量都是一样的,很明显是刷的,此操作随机访问文章,制造访问假象
public int randomBlogUrl() {
int id = new Random().nextInt(blogUrlSize);
return id;
}
//时间的随机函数,用来模拟真实的访问量操作,以防被博客后台识别,模拟操作60-200秒内的随机秒数,
public int randomClick() {
int time = (new Random().nextInt(200)) + 60;
return time;
}
//获取在【大象代理】中购买的IP,装入ArrayList<MyIp>中
public List<MyIp> getIp(String url) {
List<MyIp> ipList = null;
while (ipList == null) {
try {
//1.向ip代理地址发起get请求,拿到代理的ip
Document doc = Jsoup.connect(url)
.userAgent("Mozilla")
.cookie("auth", "token")
.timeout(3000)
.get();
System.out.println(doc.body().text());
//2,将得到的ip地址解析除字符串
String ipStr = doc.body().text().trim().toString();
System.out.println("当前使用ipStr----------" + ipStr);
//3.用正则表达式去切割所有的ip
String[] ips = ipStr.split("\\s+");
//4.循环遍历得到的ip字符串,封装成MyIp的bean
ipList = new ArrayList<MyIp>();
for (final String ip : ips) {
MyIp myIp = new MyIp();
String[] temp = ip.split(":");
myIp.setAddress(temp[0].trim());
myIp.setPort(temp[1].trim());
ipList.add(myIp);
}
} catch (IOException e) {
System.out.println("加载文档出错,等待5s后重试");
try {
Thread.sleep(5000);
} catch (InterruptedException e1) {
e1.printStackTrace();
}
}
}
return ipList;
}
//休眠进程,单位是分钟,的规则好像是:每个IP访问一个博客地址的时间间隔是5-15分钟,计数一次
public void sleepThread(int s) throws InterruptedException {
long ms = s * 1000;
Thread.sleep(ms);
System.out.println("睡眠: " + s + "s");
}
//展示访问统计总量
public void show() {
System.out.println("访问量统计:");
for (int i = 0; i < LocalBlogUrlCount.size(); i++) {
System.out.print("博客【" + i + "】:" + LocalBlogUrlCount.get(i) + "次\t");
}
System.out.println();
System.out.println("总计:" + count + "次");
System.out.println();
}
}
package com.qyk;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class StepOne {
//======================start===========================
//1、将这个地方替换成你的博客id(点击我的博客,如:,取Mrkaizi即可)
//2、将控制台打印的,大片java语句,按照ExcuteLocal中提示放到相应位置
//======================end=============================
static String userId = "Mrkaizi";
static int random_num = 0;
public static void main(String urlstr[]) throws IOException, InterruptedException {
Set<String> urls = new HashSet<String>();
// ----------------------------------------------遍历每一页 获取文章链接----------------------------------------------
final String homeUrl = "" + userId + "/article/list/";// 后面加pageNum即可
int totalPage = 0;
InputStream is;
String pageStr;
StringBuilder curUrl = null;
for (int i = 1; i < 100; i++) {
Thread.sleep(1000);
System.out.println("finding page " + i);
curUrl = new StringBuilder(homeUrl);
curUrl.append(i);
System.out.println(curUrl);
is = doGet(curUrl.toString());
pageStr = inputStreamToString(is, "UTF-8");// 一整页的html源码
List<String> list = getMatherSubstrs(pageStr, "(?<=href=\")" + userId + "/article/details/[0-9]{8,9}(?=\")");
urls.addAll(list);
if (pageStr.lastIndexOf("空空如也") != -1) {
System.out.println("No This Page!");
break;
} else {
System.out.println("Success~");
}
totalPage = i;
}
System.out.println("总页数为: " + totalPage);
// ---------------------------------------------------打印每个链接---------------------------------------------------
System.out.println("打印每个链接");
for (String s:urls) {
System.out.println("hashMap.put(id++, \""+s+"\");");
}
}
public static InputStream doGet(String urlstr) throws IOException {
URL url = new URL(urlstr);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
if (random_num++%2==0) {
conn.setRequestProperty("User-Agent",
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");
}else {
conn.setRequestProperty("User-Agent",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36");
}
conn.setRequestProperty("Referer",
"");
InputStream inputStream = conn.getInputStream();
return inputStream;
}
public static String inputStreamToString(InputStream is, String charset) throws IOException {
byte[] bytes = new byte[1024];
int byteLength = 0;
StringBuffer sb = new StringBuffer();
while ((byteLength = is.read(bytes)) != -1) {
sb.append(new String(bytes, 0, byteLength, charset));
}
return sb.toString();
}
// 正则匹配
public static List<String> getMatherSubstrs(String str, String regex) {
List<String> list = new ArrayList<String>();
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(str);
while (m.find()) {
list.add(m.group());
}
return list;
}
}
项目需要Maven依赖如下
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.3</version>
</dependency>