1、requests模块常用方法
1、requests.request( ) 构造请求,是各方法的基础方法
2、requests.get() 发送get请求
3、requests.post() 发送post请求
4、requests.head() 发送head请求
上述各方法的基本使用个常用参数基本一致
2、GET请求
一、基本请求
response = requests.get("http://www.baidu.com/")
返回response常用的基本属性有:
1、response.status_code http状态码
2、response.text HTTP响应内容的字符串形式,即,url对应的页面内容
3、response.content HTTP响应内容的二进制形式
4、response.cookies.get_dict() 获取cookie
5、response.encoding 从HTTP header中猜测的响应内容编码方式
6、response.apparent_encoding 从内容分析出的响应内容编码方式(备选编码方式)
二、定义 headers头
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"}
response = requests.get("http://www.baidu.com/", headers = headers)
三、传递url参数
可以使用params 关键字参数,以一个字符串字典来提供这些参数
# params 接收一个字典或者字符串的查询参数,字典类型自动转换为url编码,不需要urlencode()
payload = {'key1': 'value1', 'key2': ['value2', 'value3']}
response = requests.get("http://www.baidu.com/", headers = headers, params=payload)
四、带cookie请求
response = requests.get("http://www.baidu.com/", cookies=cookie_dict)
3、POST
post基本使用和get差不多
post传递参数时用data
注意:
data: 在请求体里传递的数据
params: 在URL中传递的参数,一般用于get请求
简单示例
res=requests.request(
method='POST',
url= 'http://www.baidu.com',
params = {'k1':'v1','k2':'v2'},
data = {'use':'alex','pwd': '123','x':[11,2,3]}
)
print(res.url)#http://www.baidu.com/?k1=v1&k2=v2
可以看到params参数拼接到url中了
4、各种参数使用
1、params:字典或字节序列,作为参数增加到url中 一般用于get请求
res=requests.request( method='get',
url= 'http://www.baidu.com',
params = {'k1':'v1','k2':'v2'},
)
print(res.url) #http://www.baidu.com/?k1=v1&k2=v2
2、data:字典、字节序列或文件对象,作为Request的对象
3、headers:字典,HTTP定制头
res=requests.request( method='get',
url= 'http://www.baidu.com',
headers = {"User-Agent":"chrome"}
)
4、cookies:字典或CookieJar,Request中的cookie
5、timeout:设定超时时间,秒为单位
6、proxies:字典类型,设置访问代理服务器,可以增加登录认证
7、allow_redirects:True/False,默认为Ture,重定向开关
8、stream:True/False,默认为True,获取内容立即下载开关,会立即开始下载文件并存放到内存当中,倘若文件过大就会导致内存不足的情况.
9、verigy:True/False,默认为True,认证SSL证书开关
10、cert:本地SSL证书路径
注意:
- iter_content:一块一块的遍历要下载的内容
- iter_lines:一行一行的遍历要下载的内容
r1 = requests.get('http://dig.chouti.com/')
r1_cookies = r1.cookies.get_dict()
r2 = requests.post(
url="http://dig.chouti.com/login",
headers = {"User-Agent":"chrome"},
params = {'k1':'v1','k2':'v2'},
data={'use':'alex','pwd': '123','x':[11,2,3]},
cookies=r1_cookies,
timeout=10,
proxies={"http": "http://12.34.56.79:9527","https": "http://12.34.56.79:9527"},
allow_redirects=False
)
5、各种示例
#!/usr/bin/python3
# -*- coding: UTF-8 -*-
import requests
import json
import datetime, time
import os,stat
import urllib
import http.cookiejar
class Zabbix:
def __init__(self, url,gr_url,login_url,header, username, password):
self.url = url
self.gr_url = gr_url
self.login_url = login_url
self.header = header
self.username = username
self.password = password
def getToken(self):
# 获取Token并返回字符Token字符串
data = {"jsonrpc": "2.0",
"method": "user.login",
"params": {
"user": self.username,
"password": self.password
},
"id": 1,
"auth": None
}
token = requests.post(url=self.url, headers=self.header, data=json.dumps(data))
return json.loads(token.content)["result"]
#获取所有主机组id
def getHostgroup(self):
data = {"jsonrpc": "2.0",
"method": "hostgroup.get",
"params": {
"output": ["groupid", "name"],
},
"id": 1,
"auth": self.getToken()
}
group = requests.post(url=self.url, headers=self.header, data=json.dumps(data))
#print(json.loads(group.content)["result"])
return json.loads(group.content)["result"]
#取单个主机组下所有的主机ID
def getHostid(self,gid):
data = {"jsonrpc": "2.0",
"method": "host.get",
"params": {
"output":["hostid","name"],
"groupids": gid,
},
"id": 1,
"auth": self.getToken()
}
ids = requests.post(url=self.url, headers=self.header, data=json.dumps(data))
return json.loads(ids.content)["result"]
#根据ip获取主机id
def gethostid(self,ip):
data = {
"jsonrpc": "2.0",
"method": "host.get",
"params": {
"output": ["hostid","name"], #"extend"
"filter": {
"host": ip
}
},
"auth": self.getToken(),
"id": 1
}
hostid = requests.post(url=self.url, headers=self.header, data=json.dumps(data))
return json.loads(hostid.content)['result']
#根据hostid获取graphid
def getgraphid(self,hostid):
data = {
"jsonrpc": "2.0",
"method": "graph.get",
"params": {
"output": "name",
"hostids": hostid,
"sortfield": "name",
"filter": {
"name": ['调度次数','平均调度时间']
}
},
"auth": self.getToken(),
"id": 1
}
graps = requests.post(url=self.url, headers=self.header, data=json.dumps(data))
return json.loads(graps.content)["result"]
#下载保存图片
def get_graph(self,starttime,dirs,graphid,graphname):
if os.path.exists(dirs):
pass
else:
os.makedirs(dirs)
os.chmod(dirs,stat.S_IRWXO+stat.S_IRWXG+stat.S_IRWXU)
"""
#使用urllib时会出现http.client.IncompleteRead: IncompleteRead(11891 bytes read)的报错
login_data = urllib.parse.urlencode({
"name": self.username,
"password": self.password,
"autologin": 1,
"enter": "Sign in"}).encode(encoding='UTF8')
graph_args = urllib.parse.urlencode({
"graphid": graphid,
"width": '1200',
"height": '156',
"stime": starttime, # 图形开始时间
"period": '86400'}).encode(encoding='UTF8')
cj = http.cookiejar.CookieJar() # 设置一个cookie处理器, 它负责从服务器下载cookie到本地,并且在发送请求时带上本地的cookie
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
urllib.request.install_opener(opener)
opener.open(login_url, login_data).read()
data = opener.open(gr_url, graph_args).read()
"""
#通过requests发送请求
login_data = {
"name": self.username,
"password": self.password,
"autologin": 1,
"enter": "Sign in"}
#urlencode后graph_args为graphid=579034&width=599&height=100&stime=1589445134&period=86400
graph_args = urllib.parse.urlencode({
"graphid": graphid,
"width": '599',
"height": '100',
#"stime": starttime, # 图形开始时间 zabbix3.0
#"period": '86400' #zabbix3.0
#zabbix4.0使用下面方式获取某时间段图形
"from": 'now-24h', #2020-05-13 09:00:00
"to": 'now',#2020-05-13 10:00:00
"profileIdx":'web.screens.filter'
})
#登录获取cookie
login_response = requests.post(
url = login_url,
data = login_data
)
cookie_dict = login_response.cookies.get_dict()
newgr_url = gr_url + '?' + graph_args
"""
pic_response = requests.post(
url = newgr_url,
#data = graph_args,
cookies = cookie_dict,
stream = True
)
"""
pic_response = requests.get(newgr_url,cookies=cookie_dict)
data = pic_response.content
with open(r"%s//%s-%s.png" % (dirs,graphname, datetime.datetime.now().strftime('%Y%m%d')), 'wb') as f:
f.write(data)
if __name__ == "__main__":
url = "http://127.0.0.1:18080/zabbix/api_jsonrpc.php"
#获取图片url
gr_url = "http://127.0.0.1:18080/zabbix/chart2.php"
#登录url
login_url = 'http://127.0.0.1:18080/zabbix/index.php'
#header 头
header = {"Content-Type": "application/json-rpc"}
#获取当天零点
#now = datetime.datetime.now()
#zeroToday = now - datetime.timedelta(hours=now.hour, minutes=now.minute, seconds=now.second,microseconds=now.microsecond)
# 图形开始时间
starttime = int(time.mktime((datetime.datetime.now() - datetime.timedelta(days=1)).timetuple()))
#starttime = int(time.mktime((zeroToday - datetime.timedelta(days=0)).timetuple()))
#图片保存路径
dirs = r"/home/migu/yqh/%s" % (datetime.datetime.now().strftime('%Y%m%d'))
test = Zabbix(url=url,gr_url=gr_url,login_url=login_url, header=header, username="xxxx", password="xxxxxx")
#ziyan_group=test.getHostgroup()[-1]
#ziyan_host=test.getHostid(ziyan_group['groupid'])
hostid = test.gethostid('127.0.0.1')
print(hostid)
hostid = hostid[0]['hostid']
#print(hostid)
graps = test.getgraphid(hostid)
#print(graps)
for i in graps:
print('%s的截图 已经成功保存至%20s' % (i['name'],dirs))
test.get_graph(starttime,dirs,i['graphid'],i['name'])
zabbix自动截图
"""
爬取http://blog.cuishuai.cc/meizi/网站的图片,多线程下载
"""
import requests
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor,wait,ALL_COMPLETED, FIRST_COMPLETED
import os
import PIL.Image as Image
from PIL import ImageFile
from os import listdir
import math
import matplotlib.pyplot as plt
import random
from wordcloud import WordCloud
def open_url(url):
headers = {'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Mobile Safari/537.36'}
ret = requests.get(url, headers=headers)
return ret
#下载保存一张图片
def save_imag(arg):
url = arg[0] #单个图片url
path = arg[1]
filename = url.split('/')[-1] # 最后一个/后的就是文件名
filepath = os.path.join(path,filename)
with open(filepath, 'wb') as f:
# 打开每张图片,获得二进制数据,写入文件
res = open_url(url)
imag = res.content
f.write(imag) # 将图片写入文件
print(filename + '下载完成')
#多进程下载图片
def download_imag(imag_url,folder='图片',flag=False):
if os.path.exists(folder):
pass
else:
os.mkdir(folder)
pool = ThreadPoolExecutor(50)
all_task=[ ]
if isinstance(imag_url,list):#判断imag_url是否为列表,通过多进程下载
for each in imag_url:
#save_imag(each,folder)
tmp =[each,folder]
pool.submit(save_imag, tmp)
#all_task.append(pool.submit(save_imag, tmp))
pool.shutdown(wait=True)
#wait(all_task, return_when=ALL_COMPLETED)
else:
save_imag(imag_url,folder)
#获取请求页面中所有图片url
def get_img_url(url):
img_list = [] # 存储所有图片url
res = open_url(url)
soup = BeautifulSoup(res.text, features="lxml")
html = soup.find_all("img", alt="妹子图")
for i in html:
img_url = i['src'] # 获取图片url
img_list.append(img_url)
return img_list
if __name__ == '__main__':
img_all=[]
flag=True
pages = 15
url = 'http://blog.cuishuai.cc/meizi/page_%s.html'
for i in range(pages):
newurl = url % i
img_list = get_img_url(newurl)
img_all+=img_list
download_imag(img_all)
多线程爬取图片并下载