首先获取Ip列表
#encoding=utf8
import urllib
from lxml import etree
import requests
from bs4 import BeautifulSoup
User_Agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0'
header = {}
header['User-Agent'] = User_Agent
url = 'http://www.xicidaili.com/nn/1'
req = urllib.request.Request(url,headers=header)
res = urllib.request.urlopen(req).read()
res =requests.get(url,headers=header).text
ip=etree.HTML(res)
ip=ip.xpath('//*[@id="ip_list"]/*')
myIp=""
myPort=""
data=""
for i in range(0,len(ip)):
'IP地址 端口'
if i==0:
continue
for j in range(0,len(ip[i])):
if j==1:
myIp=ip[i][j].text
if j==2:
res = myIp+ " "+ip[i][j].text
data=data+res+"\n"
with open("ip.txt","w") as f:
f.write(data)
然后 检测ip是否可用
#encoding=utf8
import urllib
import socket
from urllib import request
socket.setdefaulttimeout(3)
f = open("ip.txt")
lines = f.readlines()
proxys = []
for i in range(0,len(lines)):
ip = lines[i].strip("\n").split(" ")
proxy_host = "http://"+ip[0]+":"+ip[1]
proxy_temp = {"http":proxy_host}
proxys.append(proxy_temp)
url = "http://ip.chinaz.com/getip.aspx"
for proxy in proxys:
try:
proxy_support = request.ProxyHandler(proxy)
opener = request.build_opener(proxy_support)
res = opener.open(url).read()
print (res)
except Exception as e :
print (proxy)
print (e)
continue