++++++++++开始线++++++++++++++++++
文章目录
- 一、 cookie
- 1.1 认识cookie
- 1.2 破解cookie
- 1.3 再破解cookie
- 1.4 URLError
- 二、 有关Request
- 2.1 基本属性
- 2.2 添加请求头
- 2.3 自动转译
- 2.4 jason
一、 cookie
1.1 认识cookie
01-cookies.py
import urllib.request
# 1.数据url
url = 'https://www.yaozh.com/member/'
# 2.添加请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/70.0.3538.67 Safari/537.36 '
}
# 3.构建请求对象
request = urllib.request.Request(url,headers=headers)
# 4.发送请求对象
response = urllib.request.urlopen(request)
# 5.读取数据
data = response.read()
print(type(data))
# 保存到文件中 验证数据
with open('01cook.html', 'wb') as f:
f.write(data)
01cook.html
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "//www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html class="ready-hide" xmlns="//www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="robots" content="nofollow" />
<title>消息提示</title>
<link type="text/css" rel="stylesheet" href="/public/css/core.css"/>
<link rel="stylesheet" href="//static.yaozh.com/css/app.css">
<link rel="stylesheet" href="/public/css/index.new2.css?t=20180803">
</head>
<script>
//设置domain
document.domain = location.host.replace(/(\w|-)+\./,'');
//全局变量 Global Variables
var GV = {
JS_ROOT : '/public/js/',//js目录
JS_VERSION : "0D0AEBFF77",
TOKEN : "0D0AEBFF77", //token $.ajaxSetup data
FILE_TYPE : "*.gif;*.jpg;*.png;*.zip;*.rar;*.txt;*.doc;*.docx;*.pdf;*.xls;*.xlsx;",
FILE_SIZE : "10 MB",
UID : parseInt(""), //用户空间(参数 : uid)
URL : {
IMAGE : '/images/', //登录地址
QUICK_LOGIN : '/ajax/login/?backurl=/login'
}
};
</script>
<script src="/public/js/yaozh.js?v=20130531"></script>
<script>
Wind.use("jquery",function(){
if(!!window.frameElement){
$('html').addClass('iframe');
}
});
</script> <script>
var config = {
baseUrl : '//static.yaozh.com/js',
deps : ['/public/js/newindex2.js']
}
</script>
<script src="//static.yaozh.com/js/app.js?t=20150610"></script>
<body>
<!-- header -->
<div class="header">
<div class="wrapper">
<div class="header-left">
<a target="_blank" class="item" href="https://news.yaozh.com">新闻资讯</a>
<a target="_blank" class="item" href="https://db.yaozh.com">药智数据</a>
<a target="_blank" class="item" href="https://patent.yaozh.com">专利通</a>
<a target="_blank" class="item" href="https://zx.yaozh.com/?yaozh">药智咨询</a>
<a target="_blank" class="item" href="https://www.yaozh.com/zhihui/?yaozh">药智汇</a>
<a target="_blank" class="item" href="https://s.yaozh.com">药智通</a>
<a target="_blank" class="item" href="https://edu.yaozh.com/">药智学院</a>
<a target="_blank" class="item" href="https://bbs.yaozh.com">论坛交流</a>
<a target="_blank" class="item" href="https://club.yaozh.com/">俱乐部</a>
<a target="_blank" class="item" href="https://www.yaohaiwai.com/?ga_source=www&ga_name=top_navigationbar">海外智通</a>
<a target="_blank" class="item" href="https://gu.yaozh.com/ ">药智谷</a>
<a target="_blank" class="item" href="https://nav.yaozh.com/">药智搜</a>
<a target="_blank" class="item" href="https://yaozh.com/zt/index">专题中心</a>
<a target="_blank" class="item" href="https://job.yaozh.com/">药智人才</a>
</div>
<div class="header-right">
<a class="item" target="_blank" href="//www.yaozh.com/login/">登录</a>
<a target="_blank" class="item" href="//www.yaozh.com/register/">注册</a>
<span class="item app">
<i class="fa"></i>
<span><a target="_blank" href="https://db.yaozh.com/app?ga_source=www&ga_name=sumlink_wwwty_top">药智数据APP</a></span>
<div class="hover-layer">
<div class="box">
<a target="_blank" href="https://itunes.apple.com/cn/app/id1025304074" onclick='ga("send", "event", "button", "click","appdownload_ios_wwwty_top")' target='_blank' class="btn btn-sm ios btn-gray">
<i class="fa"></i>
<span>iOS下载</span>
</a>
<a target="_blank" href="//static.yaozh.com/yaozh_latest.apk" onclick='ga("send", "event", "button", "click","appdownload_android_wwwty_top")' class="btn btn-sm android mt10 btn-green">
<i class="fa"></i>
<span>Android下载</span>
</a>
<div class="tc mt10">
<img src="/public/images/sumscan_wwwty_top.png?_v=1.5.26" alt="" style="width: 150px;">
</div>
</div>
</div>
</span>
</div>
</div>
</div>
<!-- header-bar -->
<div class="header-layer" data-widget="sticky" data-wrapperclass="header-bar">
<form class="wrapper" action="//db.yaozh.com/Search" target='_blank'>
<a target="_blank" href="//www.yaozh.com" class="logo" title="药智网" style="padding: 0;"></a>
<div class="search-box">
<select class="search-type-select ignore-focus" name="btn_jiansuo" data-widget="dropdownSelect">
<option value="1" data-action="//db.yaozh.com/Search" data-key="content">数据库</option>
<option value="2" data-action="//s.yaozh.com/Index/search" data-key="search">药智通</option>
<option value="3" data-action="//www.yaozh.com/list/" data-key="keytitle">药智汇</option>
<!-- <option value="4" data-action="//bbs.yaozh.com/search.php?searchsubmit=yes" data-key="srchtxt">论坛</option> -->
</select>
<input class="search ignore-focus" name="content" type="text">
<a href="javascript:;" class="search-btn">搜索</a>
</div>
<a target="_blank" class="app_link" href="//db.yaozh.com/app">
<img src="/public/images/app_link.gif" alt="">
</a>
</form>
<div class="m-show action-bar">
<a href="javascript:;" class="show-search"><i class="fa"></i></a>
<a href="javascript:;" class="show-menu"><i class="fa"></i></a>
</div>
</div>
<div class="showmsg_warp clearfix">
<div class="showmsg_box warning">
<dl class="showmsgs">
<dt><p class="showmsg_left_bg"></p></dt>
<dd>
<div class="upheight">
<p class="tit">您还未登录!</p>
<p class="con">你可以给<a href="#">小智</a>吐个槽<br>紧急状况请联系药智客服:400-678-0778</p>
</div>
<p class="link"><span id="J_timer" class="Y_red">3</span>秒之后返回,<a id="J_url" data-url="/login" href="/login">如果没有跳转,请点击</a></p>
</dd>
</dl>
</div>
</div>
<div class="footer">
<div class="footer_1200">
<div class="footer_fr">
<div class=""><img width="111" height="111" src="/public/images/scan_wwwty_bottom_1.png?_v=1.5.26" alt=""></div>
<div>
药智数据APP
</div>
</div>
<p>
<a href="http://about.yaozh.com/about.html">关于我们</a><span class="line">|</span>
<a href="http://about.yaozh.com/contact.html">联系我们</a><span class="line">|</span>
<a href="http://about.yaozh.com/qualification.html">企业资质</a><span class="line">|</span>
<a href="http://about.yaozh.com/join.html">诚聘英才</a><span class="line">|</span>
<a href="http://about.yaozh.com/link.html">友情链接</a><span class="line">|</span>
<a href="//help.yaozh.com/" target="_blank">帮助中心</a><span class="line">|</span>
<a href="//bbs.yaozh.com/forum-123-1.html">媒体报道</a><span class="line">|</span>
<a href="//bbs.yaozh.com">药智论坛</a><span class="line">|</span>
<a href="//about.yaozh.com/about/sitemap">全站导航</a><span class="line">|</span>
<a href="//www.yaozh.com/ued?source=www&name=zhihui_footer">用户体验提升计划</a>
<a href="//www.yaozh.com/xuan">业务介绍</a>
</p>
<p>互联网增值电信业务许可证编号:渝B2-20120028 | 渝ICP备10200070号 互联网药品信息服务资格证:(渝)-经营性-2016-0011 渝公网备:50010801500236</p>
<p>康洲大数据 版权所有 Copyright © 2009-2020 药智网YAOZH.COM All Rights Reserved. 法律顾问:上海锦天城(重庆)律师事务所 程建律师
<script>
/*GoogleAnaly*/
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-73321472-5', 'auto');
ga('send', 'pageview');
</script>
<span style="display:none"><script type="text/javascript">
var _bdhmProtocol = (("https:" == document.location.protocol) ? " https://" : " //");
document.write(unescape("%3Cscript src='" + _bdhmProtocol + "hm.baidu.com/h.js%3F65968db3ac154c3089d7f9a4cbb98c94' type='text/javascript'%3E%3C/script%3E"));
</script>
</span>
</p>
<p>客户服务热线:400-678-0778 E-mail:service@yaozh.com 商务合作QQ:845146016</p>
<a id="netsafe" target="_blank" href="http://www.beian.gov.cn/portal/registerSystemInfo?recordcode=50010802001068"><img src="//www.yaozh.com/public/images/netsafe.png"/><p>渝公网安备 50010802001068号</p></a>
<!-- <a id="outer-anquan" key ="58fd5825efbfb064f4599465" logo_size="83x30" logo_type="realname" href="http://www.anquan.org" ><script src="//static.anquan.org/static/outer/js/aq_auth.js"></script></a> -->
<a id="outer-anquan" target="cyxyv" href="https://v.yunaq.com/certificate?domain=www.yaozh.com&from=label&code=90020" rel="nofollow"> <img height="30" src="https://aqyzmedia.yunaq.com/labels/label_sm_90020.png"></a>
<div class="tousu tousu1">
<img src="/public/images/tousu.png" alt="">
<div class="tousu-msg">
<p>投诉热线: 02362308742</p>
<p>邮箱: tousu@yaozh.com </p>
<p> QQ: 914894005</p>
</div>
</div>
</div>
</div>
<script>
(function(){
var bp = document.createElement('script');
var curProtocol = window.location.protocol.split(':')[0];
if (curProtocol === 'https'){
bp.src = 'https://zz.bdstatic.com/linksubmit/push.js';
}
else{
bp.src = 'http://push.zhanzhang.baidu.com/push.js';
}
var s = document.getElementsByTagName("script")[0];
s.parentNode.insertBefore(bp, s);
})();
// 网站底部投诉信息
$(function () {
var tousu=$('.tousu')[0];
var tousumsg=$('.tousu-msg')[0];
tousu.onmouseover=function(){
tousumsg.style.display='block';
}
tousumsg.onmouseout=function(){
tousumsg.style.display='none';
}
function stop(e){
e.stopPropagation();
}
$(tousu).on('click',stop);
$(tousumsg).on('click',stop);
$(document).on('click',function(){
tousumsg.style.display='none';
})
})
</script>
<script>
Wind.use("jquery", "global", function(){
window.history.forward(1);
var timer = parseInt($("#J_timer").html()),
url = $("#J_url").data("url");
function jump(){
if(window.frameElement){
window.frameElement.trigger("refresh");
}else if(url){
window.location.href = url;
}else{
history.go(-1);
}
}
if(!timer){
timer = 3;
}
var m_timer = setInterval(function(){
timer = timer - 1;
if(timer <=0){
jump();
clearInterval(m_timer);
}else{
$("#J_timer").html(timer);
}
},1000); //指定1秒刷新一次
});
</script>
</body>
</html>
在本地浏览器打开01cook.html
1.2 破解cookie
登录到个人中心打开F12-找到cookie复制添加到headers
01-cookies2.py
"""
直接获取 个人中心的页面
手动粘贴 复制 PC 抓包的 cookies
放在 request对象的请求头里面
"""
import urllib.request
# 1.数据url
url = 'https://www.yaozh.com/member/'
# 2.添加请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/70.0.3538.67 Safari/537.36 '
,
'Cookie': 'acw_tc=2f624a2e15971256643447545e7f81ef33b63b4856e8b873b4cc16cfd536b5; '
'PHPSESSID=1cpe128vekurm2cph66psd64l3; _ga=GA1.2.2061322105.1597125665; '
'_gid=GA1.2.2134737262.1597125665; _gat=1; Hm_lvt_65968db3ac154c3089d7f9a4cbb98c94=1597125665; '
'Hm_lpvt_65968db3ac154c3089d7f9a4cbb98c94=1597125669; yaozh_logintime=1597125674; '
'yaozh_user=966627%09linguoqing; yaozh_userId=966627; '
'yaozh_jobstatus=kptta67UcJieW6zKnFSe2JyXnoaabZtrl5uHnKZxanJT1qeSoMZYoNdzaJFyVM'
'%2FO0MjZ09Kg05yHn9ibbHFXpJLUrZCnyqPKhnSqm2linYe42DC0C2f7CB153A9DB6B8F574399B6C9Tlp2bkmmaaJ6Vh5ymcWlyU9WinpiDcdieamqbWmOYnpmSlpmXbpprlpyHnLA%3D6b69834e71bb94c162ef96c9de07166a; db_w_auth=806631%09linguoqing; UtzD_f52b_saltkey=dWE3MyHj; UtzD_f52b_lastvisit=1597122076; UtzD_f52b_lastact=1597125676%09uc.php%09; UtzD_f52b_auth=1cd7AgQ6z%2BljwCPcjsOKGsNXC7G%2B9Sd0L375kOtE1RuiRxgDK%2BOOhkzRfqT9Fpf0V7Uol9YdGivvbUC1NLd%2BVcQ1mxI; yaozh_uidhas=1; yaozh_mylogin=1597125678; acw_tc=2f624a2e15971256643447545e7f81ef33b63b4856e8b873b4cc16cfd536b5 '
}
# 3.构建请求对象
request = urllib.request.Request(url, headers=headers)
# 4.发送请求对象
response = urllib.request.urlopen(request)
# 5.读取数据
data = response.read()
print(type(data))
# 保存到文件中 验证数据
with open('01cook.html', 'wb') as f:
f.write(data)
在本地浏览器打开01cook.html
登陆成功!
1.3 再破解cookie
01-cookies3.py
"""
获取 个人中心的页面
1. 代码登录 登录成功 cookie(有效)
2. 自动带着cookie 去请求个人中心
cookiejar 自动保存这个cookie
"""
import urllib.request
from http import cookiejar
from urllib import parse
# 找登录 参数
# 后台 根据你发送的请求方式来判断的 如果你是get(登录页面),如果POST(登录结果)
# 1. 代码登录
# 1.1 登录的网址(此时的请求是GET)
login_url = 'https://www.yaozh.com/login' # 登录界面的url
# 1.2 登录的参数
login_form_data = {
"username": "874475806@qq.com", # 账号
"pwd": "874475806lin", # 密码
"formhash": "D7C67699B1", # 在登录界面查找
"backurl": "https%3A%2F%2Fwww.yaozh.com%2Fmember%2F" # 在登录界面查找
}
# 1.3 发送登录请求POST(这是登录之后查看到的请求是POST)
cook_jar = cookiejar.CookieJar()
# 自定义有添加cook功能的处理器
cook_hanlder = urllib.request.HTTPCookieProcessor(cook_jar)
# 自定义根据处理器生成opener
opener = urllib.request.build_opener(cook_hanlder)
# 带着参数发送post请求
# 添加请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36'
}
# 1.参数需要转译转码;
# 2. post请求的data要求是bytes
login_str = parse.urlencode(login_form_data).encode('utf-8')
login_request = urllib.request.Request(login_url, headers=headers, data=login_str)
# 如果登录成功, cookjar自动保存cookie
opener.open(login_request)
# 2. 代码带着cooke去访问个人中心
center_url = 'https://www.yaozh.com/member/'
center_request = urllib.request.Request(center_url, headers=headers)
response = opener.open(center_url)
# bytes -->str
data = response.read().decode()
with open('02cook.html', 'w') as f:
f = open("02cook.html", "w", encoding='utf-8')
f.write(data)
在本地浏览器打开02cook.html
1.4 URLError
# urlib.request 提示错误 HTTPError UrlError
"""
url = 'https://affdsfsfsdfd.cn'
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 8] nodename nor servname provided, or not known>
url = ''
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 404: Not Found
"""
import urllib.request
try:
response = urllib.request.urlopen(url)
except urllib.request.HTTPError as error:
print(error.code)
except urllib.request.URLError as error:
print(error)
二、 有关Request
2.1 基本属性
03-requests_use1.py
# 1.记得安装 第三方 模块 requests
# pip install requests
import requests
url = 'http://www.baidu.com'
response = requests.get(url)
# content属性返回的类型是bytes
data = response.content.decode('utf-8')
# text属性返回的类型是文本str
data = response.text
print(type(data))
print(data)
2.2 添加请求头
03-requests_use2.py
# 1.记得安装 第三方 模块 requests
# pip install requests
import requests
class RequestSpider(object):
def __init__(self):
url = 'https://www.baidu.com'
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/70.0.3538.67 Safari/537.36 '
}
self.response = requests.get(url, headers=headers)
def run(self):
data = self.response.content
# 1.获取请求头
request_headers = self.response.request.headers
# 2.获取相应头
code_response_headers = self.response.headers
# 3.响应状态码
code = self.response.status_code
# 4.请求的cookie
request_cookie = self.response.request._cookies
print(request_cookie)
# 5. 响应的cookie
response_cookie = self.response.cookies
print(response_cookie)
RequestSpider().run()
2.3 自动转译
03-requests_use3.py
# https://www.baidu.com/s?wd=%E7%BE%8E%E5%A5%B3&rsv_spt=1&rsv_iqid=0xefb8b43600013949&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_pg&rsv_enter=1&oq=%25E5%25A4%25B4%25E6%259D%25A1&rsv_t=6e3aSjYtw0WgEg7MAIuUlOc3D5lwFBJUVw3KsdkhkWYhZWcNMn9kLBO12GflHlOeUHxx&inputT=506&rsv_pq=81d8f9470001b348&rsv_sug3=19&rsv_sug1=16&rsv_sug7=100&bs=%E5%A4%B4%E6%9D%A1
import requests
# 参数自动转译
# url = 'https://www.baidu.com/s?wd=美女'
url = 'https://www.baidu.com/s'
params = {
'wd': "美女"
}
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/70.0.3538.67 Safari/537.36 '
}
response = requests.get(url, headers=headers, params=params)
data = response.content.decode()
with open('baidu.html', 'w') as f:
f.write(data)
# 发送post 和添加参数
# requests.post(url,data=(参数{}),json=(参数))
2.4 jason
03-requests_use4.py
# https://www.baidu.com/s?wd=%E7%BE%8E%E5%A5%B3&rsv_spt=1&rsv_iqid=0xefb8b43600013949&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_pg&rsv_enter=1&oq=%25E5%25A4%25B4%25E6%259D%25A1&rsv_t=6e3aSjYtw0WgEg7MAIuUlOc3D5lwFBJUVw3KsdkhkWYhZWcNMn9kLBO12GflHlOeUHxx&inputT=506&rsv_pq=81d8f9470001b348&rsv_sug3=19&rsv_sug1=16&rsv_sug7=100&bs=%E5%A4%B4%E6%9D%A1
import requests
import json
url = 'https://api.github.com/user'
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/70.0.3538.67 Safari/537.36 '
}
# 这个网址返回的内容不是html 而是标准的json
response = requests.get(url, headers=headers)
# str
# data = response.content.decode()
# str转换为dict
# data_dict = json.loads(data)
# print(data_dict)
# json() 自动将json字符串 转换成Python dict list
data = response.json()
print(type(data))
print(data['message'])
++++++++++结束线++++++++++++++++++