import csv
import requests
#安装库pycryptodome
from Cryptodome.Cipher import AES
from base64 import b64encode
import json
#1 找到未加密的参数
#2 想办法把参数进行加密(必须参考网易的逻辑)
#3 请求到网易,拿到评论信息
url="https://music.163.com/weapi/comment/resource/comments/get?csrf_token="
#请求方式是post
#网易云评论区链接参数加密函数var bKB4F = window.asrsea(JSON.stringify(i2x), buV3x(["流泪", "强"]), buV3x(Rg7Z.md), buV3x(["爱心", "女孩", "惊恐", "大笑"]));
#原本i2x参数为
# csrf_token: ""
# cursor: "-1"
# offset: "0"
# orderType: "1"
# pageNo: "1"
# pageSize: "20"
# rid: "R_SO_4_574566207"
# threadId: "R_SO_4_574566207"
id=input("请输入id")
data={
"csrf_token": "",
"cursor": "-1",
"offset": "0",
"orderType": "1",
"pageNo": "1",
"pageSize": "20",
"rid": f"R_SO_4_{id}",
"threadId": f"R_SO_4_{id}"
}
e='010001'
f='00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'
g='0CoJUm6Qyw8W8jud'
i='pAy6iJ2UHBz7aRTB'
encSecKey = '18c8f576cb3168e9c282825b8d88243b1c525b6ab561b2de9715ce0aaafa7f2da27cc0c180eabe2372646d7b20b116d0ca43c6444d2df6296d4659216f967ffd7ab66d8143fa1212426c00c8205c02cbad94b13cdecc19daa1d8784b32004b4ce9be3d8c967838a5f3a4315efc18115de4c7f6bcf09f3645a89148518b9c93db'
#进行加密
#加密函数
# function a(a) {#随机的16位字符串
# var d, e, b = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", c = "";
# for (d = 0; a > d; d += 1) #循环16次
# e = Math.random() * b.length, #随机数
# e = Math.floor(e),#取整
# c += b.charAt(e);#从字符串中取数
# return c
# }
# function b(a, b) {
# var c = CryptoJS.enc.Utf8.parse(b)
# , d = CryptoJS.enc.Utf8.parse("0102030405060708") #加密密钥
# , e = CryptoJS.enc.Utf8.parse(a) #就是数据
# , f = CryptoJS.AES.encrypt(e, c, { #aes加密算法 c就是加密的密钥
# iv: d,
# mode: CryptoJS.mode.CBC
# });
# return f.toString()
# }
# function c(a, b, c) {
# var d, e;
# return setMaxDigits(131),
# d = new RSAKeyPair(b,"",c),
# e = encryptedString(d, a)
# }
# function d(d, e, f, g) { 参数d就是传入的数据data e使用console传参以后的值为'010001' f使用console计算以后的值为'00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'
# var h = {} g='0CoJUm6Qyw8W8jud'
# , i = a(16); #16位的随机值,把i固定其他就固定了
# return h.encText = b(d, g),
# h.encText = b(h.encText, i),返回的就是params的参数,i是加密密钥
# h.encSecKey = c(i, e, f),e与f都是固定的,i是随机的
# h
# }
#encText两次加密,数据+g->再+随机数->加密结果
def get_param(data): #默认收到的是字符串而不是字典
first=aes_param(data,g)
second=aes_param(first,i)
return second
def to16(data):
pad=16-len(data)%16
data+=chr(pad)*pad
return data
def aes_param(data,key):
iv="0102030405060708"
data=to16(data)
aes=AES.new(key=key.encode("utf-8"),IV=iv.encode("utf-8"),mode=AES.MODE_CBC) #创造加密器
bs=aes.encrypt(data.encode("utf-8")) #加密,加密内容长度必须是16倍数
return str(b64encode(bs),"utf-8") #直接转utf-8无法识别
def get_key():
return encSecKey
requ = requests.post(url,data={
"params":get_param(json.dumps(data)),
"encSecKey":encSecKey
})
print(requ.text)
#普通评论
# for i in range(20):
# a=requ.json()["data"]['comments'][i]['content']
# print(a)
f=open(f"网易云热评/{id}.csv",mode="a", encoding="utf-8", newline='')
csvwriter=csv.writer(f)
for i in range(15):
b=requ.json()["data"]['hotComments'][i]['content']
print(b)
csvwriter.writerow([b])
根据网页信息,察觉到,网页参数是经过js解析的,于是通过查看js代码,反向推断出加密过程,得出结果,优化后可以实现根据歌曲链接编号爬取网易云歌曲热评
且保存到本地
详细教程链接:https://www.bilibili.com/video/BV1ZT4y1d7JM?p=52&spm_id_from=pageDriver