实战 爬取qq音乐
1.项目详情
歌手分区:(a-#)
整个爬虫项目按功能分为爬虫规则和数据入库,分别对应文件 music.py 和 music_db.py。
爬虫规则大致如下:
在歌手列表(https://y.qq.com/portal/singer_list.html)中按照字母类别对歌手进行分类,遍历每个分类下的每位歌手页面,然后获取每位歌手页面下的全部歌曲信息。根据该设计方案列出遍历次数:
1.遍历每个歌手的歌曲页数。
2.遍历每个字母分类的每页歌手信息。
3.遍历每个字母分类的歌手总页数。
4.遍历 26 个字母分类的歌手列表。
在功能上至少需要实现 4 次遍历,但实际开发中往往比这个次数要多。统计遍历次数,主要能让开发者对项目开发有整体的设计逻辑。项目开发使用模块化设计思想,对整个项目模块的划分如下:
1.歌曲下载。
2.歌手信息和歌曲信息。
3.字母分类下的歌手列表。
4.全站歌手列表。
2.导入包,写headers
import requests
import re,json,csv
from urllib import parse
import math
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36',
'Referer': 'https://y.qq.com/',
'DNT':'1'}
3. 分析歌手分区url
A-----#一共27个
https://y.qq.com/ portal/singer. list.html#page= 1&index=1&
https://y.qq.com/ portal/singer. list. html#page= 1&index=27&
这个url很好分析,1-27递增,规律十分明显
4. 分为27个进程分别运行下属的歌手用线程执行
def myProcess():
#把歌手按照子母分区,分为27个进程处理
with ProcessPoolExecutor(max_workers=27) as p:
for i in range(1,28):
p.submit(get_singer_mid,i)
if __name__ == '__main__':
myProcess()
5. 歌手mid–用于进入详情页
如果你访问进入每个歌手的详情页,如下图,
你会发现每一个歌手都有一个序号,在qq音乐里面定义为mid,我们应该获取它已达到访问详情页的目的
两种获取方法
1.解析页面(不行)2.获取接口
网页采取ajax交互,所以我们只能采取第二种方法
我们在这里便找到了
我们看下他的headers里的内容
注意这里的data:
注意这里的参数 唯一变的就是sin 和 cur_page
cur Page 1 sin0
cur page.2 sin80
cur page.3 sin160
那么这里我们就可以推测只有这两个是必要的,
我们可以在测试文件里写测试一下
我们把url里从data那里截出来,然后加上我们的data
from urllib import parse
import requests
data='{"comm":{"ct":24,"cv":0},"singerList":{"module":"Music.SingerListServer","method":"get_singer_list","param":{"area":-100,"sex":-100,"genre":-100,"index":12,"sin":0,"cur_page":0}}}'
url='https://u.y.qq.com/cgi-bin/musicu.fcg?-=getUCGI24849744963436082&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data='+parse.quote(data)
response=requests.get(url)
singers=response.json()["singerList"]["data"]["singerlist"]
for singer in singers:
print(singer)
输出:
{'country': '', 'singer_id': 352304, 'singer_mid': '002whYns25VtzG', 'singer_name': 'Lost Orchards', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002whYns25VtzG.webp'}
{'country': '', 'singer_id': 975686, 'singer_mid': '003JYrbP32jIl5', 'singer_name': 'Lightnin` Hopkins', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003JYrbP32jIl5.webp'}
{'country': '', 'singer_id': 353182, 'singer_mid': '003XSa3z2ZVq3t', 'singer_name': 'Love You Moon', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003XSa3z2ZVq3t.webp'}
{'country': '', 'singer_id': 1090600, 'singer_mid': '000aJVBC4NAKq6', 'singer_name': 'Lösekes Blues Gang', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000aJVBC4NAKq6.webp'}
{'country': '', 'singer_id': 353721, 'singer_mid': '002S9Dc220UVsr', 'singer_name': 'Long Play 33 1/3', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002S9Dc220UVsr.webp'}
{'country': '', 'singer_id': 1115824, 'singer_mid': '001rYfCf2jwDnZ', 'singer_name': 'Lino Muoio', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001rYfCf2jwDnZ.webp'}
{'country': '', 'singer_id': 348556, 'singer_mid': '001Mu6gx3UA4rc', 'singer_name': 'Loren Kate', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001Mu6gx3UA4rc.webp'}
{'country': '', 'singer_id': 356934, 'singer_mid': '004Q6iS21odLvn', 'singer_name': "Lil' Nathan", 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000004Q6iS21odLvn.webp'}
{'country': '', 'singer_id': 1062499, 'singer_mid': '002GKWTA1Gm5k7', 'singer_name': 'Larissa Ndi', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002GKWTA1Gm5k7.webp'}
{'country': '', 'singer_id': 350973, 'singer_mid': '000jmiZb0M0gCF', 'singer_name': 'Lastkaj 14', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000jmiZb0M0gCF.webp'}
{'country': '', 'singer_id': 1096536, 'singer_mid': '001LFiYj4POls5', 'singer_name': 'Luther Johnson', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001LFiYj4POls5.webp'}
{'country': '', 'singer_id': 351275, 'singer_mid': '002IJo1503iqrX', 'singer_name': 'La Masa', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002IJo1503iqrX.webp'}
{'country': '', 'singer_id': 1167231, 'singer_mid': '002gvvrz2RSJrO', 'singer_name': 'Low Society', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002gvvrz2RSJrO.webp'}
{'country': '', 'singer_id': 351864, 'singer_mid': '0022S1HK3jRgek', 'singer_name': "Let's Talk About Trees", 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M0000022S1HK3jRgek.webp'}
{'country': '', 'singer_id': 352126, 'singer_mid': '002jEKq34BPSNN', 'singer_name': 'Lena Hughes', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002jEKq34BPSNN.webp'}
{'country': '', 'singer_id': 90217, 'singer_mid': '003NOBHO0CammO', 'singer_name': '李勇', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003NOBHO0CammO.webp'}
{'country': '', 'singer_id': 23981, 'singer_mid': '001NpcYm0gcJd7', 'singer_name': '竜鉄也', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001NpcYm0gcJd7.webp'}
{'country': '', 'singer_id': 1076316, 'singer_mid': '0017SCQb3I5CkW', 'singer_name': 'Lewis Del Mar', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M0000017SCQb3I5CkW.webp'}
{'country': '', 'singer_id': 2493, 'singer_mid': '002UW6064I0GvC', 'singer_name': "L'Algerino", 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002UW6064I0GvC.webp'}
{'country': '', 'singer_id': 99697, 'singer_mid': '003Y92mD3LCk3z', 'singer_name': "L'Amitie", 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003Y92mD3LCk3z.webp'}
{'country': '', 'singer_id': 5631, 'singer_mid': '000PZ0Te2abtdX', 'singer_name': "L'Arc〜en〜Ciel (彩虹乐队)", 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000PZ0Te2abtdX.webp'}
{'country': '', 'singer_id': 63100, 'singer_mid': '000Htjk71ik8ce', 'singer_name': "L'arte del mondo/Pera Ensemble", 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000Htjk71ik8ce.webp'}
{'country': '', 'singer_id': 99528, 'singer_mid': '004D5j0M0bPhA8', 'singer_name': "L'Ham De Foc", 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000004D5j0M0bPhA8.webp'}
{'country': '', 'singer_id': 95262, 'singer_mid': '003w8J722tMYsN', 'singer_name': "l'Occidentale de Fanfare", 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003w8J722tMYsN.webp'}
{'country': '', 'singer_id': 104913, 'singer_mid': '003OUmgT21eahJ', 'singer_name': "L'Odyssey", 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003OUmgT21eahJ.webp'}
{'country': '', 'singer_id': 23726, 'singer_mid': '000TE5pG1HPFfe', 'singer_name': 'L-R', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000TE5pG1HPFfe.webp'}
{'country': '', 'singer_id': 102123, 'singer_mid': '001xzB1q4b2j76', 'singer_name': 'L. Vaidyanatham', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001xzB1q4b2j76.webp'}
{'country': '', 'singer_id': 46468, 'singer_mid': '002xXdPJ4QABLM', 'singer_name': 'L. Young', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002xXdPJ4QABLM.webp'}
{'country': '', 'singer_id': 5747, 'singer_mid': '0029BfF42PJ7IN', 'singer_name': 'L.A四贱客', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M0000029BfF42PJ7IN.webp'}
{'country': '', 'singer_id': 242431, 'singer_mid': '002OnmDE44fB7P', 'singer_name': 'L.BOW GREASE', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002OnmDE44fB7P.webp'}
{'country': '', 'singer_id': 122116, 'singer_mid': '001OHJul0Rcn3w', 'singer_name': 'L.C. Williams', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001OHJul0Rcn3w.webp'}
{'country': '', 'singer_id': 224008, 'singer_mid': '000fQkuh2OohkC', 'singer_name': 'L.J. Reynolds', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000fQkuh2OohkC.webp'}
{'country': '', 'singer_id': 23723, 'singer_mid': '001AUTEJ2uowA7', 'singer_name': 'L.L BROTHERS', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001AUTEJ2uowA7.webp'}
{'country': '', 'singer_id': 965343, 'singer_mid': '00004Vei3TdW8H', 'singer_name': 'L.M. Abraham', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M00000004Vei3TdW8H.webp'}
{'country': '', 'singer_id': 112123, 'singer_mid': '0032ZIkw3vZz6E', 'singer_name': 'L.T.D.', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M0000032ZIkw3vZz6E.webp'}
{'country': '', 'singer_id': 131124, 'singer_mid': '000FueZH3q2Vhk', 'singer_name': 'L.V. Johnson', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000FueZH3q2Vhk.webp'}
{'country': '', 'singer_id': 1686, 'singer_mid': '004Y8MFl15xfmG', 'singer_name': 'L7', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000004Y8MFl15xfmG.webp'}
{'country': '', 'singer_id': 718996, 'singer_mid': '000Ntnbr1JfDlj', 'singer_name': 'La Banda Municipal Del Polo Norte', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000Ntnbr1JfDlj.webp'}
{'country': '', 'singer_id': 95222, 'singer_mid': '002485874KF2hE', 'singer_name': 'La banda municipale de Santiago de Cuba', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002485874KF2hE.webp'}
{'country': '', 'singer_id': 259576, 'singer_mid': '002b6dUV4KyeSJ', 'singer_name': 'La banda Wagliò', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002b6dUV4KyeSJ.webp'}
{'country': '', 'singer_id': 3143, 'singer_mid': '000NhYnj0JciGJ', 'singer_name': 'La Barberia Del Sur', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000NhYnj0JciGJ.webp'}
{'country': '', 'singer_id': 101056, 'singer_mid': '003Y9Vx52fAf0E', 'singer_name': 'La Beat', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003Y9Vx52fAf0E.webp'}
{'country': '', 'singer_id': 243520, 'singer_mid': '004KGxGc3R84EI', 'singer_name': 'La Beniterrània', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000004KGxGc3R84EI.webp'}
{'country': '', 'singer_id': 258173, 'singer_mid': '004TOMV13KqomU', 'singer_name': 'La Blues Band de Granada', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000004TOMV13KqomU.webp'}
{'country': '', 'singer_id': 16785, 'singer_mid': '003xEmxg0fg9xt', 'singer_name': 'La Bottine Souriante', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003xEmxg0fg9xt.webp'}
{'country': '', 'singer_id': 115416, 'singer_mid': '001tlc9Q0xxVc1', 'singer_name': 'La Bullonera', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001tlc9Q0xxVc1.webp'}
{'country': '', 'singer_id': 233787, 'singer_mid': '003ESnW63JLMJ0', 'singer_name': 'La Búsqueda', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003ESnW63JLMJ0.webp'}
{'country': '', 'singer_id': 1998, 'singer_mid': '002Qomfp3ewNK0', 'singer_name': 'La Cabra Mecanica', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002Qomfp3ewNK0.webp'}
{'country': '', 'singer_id': 234713, 'singer_mid': '0001JDUH2HUcig', 'singer_name': 'La Cacharrera', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M0000001JDUH2HUcig.webp'}
{'country': '', 'singer_id': 251902, 'singer_mid': '002OtvQN1yMHhM', 'singer_name': 'La Coixinera', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002OtvQN1yMHhM.webp'}
{'country': '', 'singer_id': 232878, 'singer_mid': '000YWbDQ49e9Qi', 'singer_name': 'La Coloma i el Puma', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000YWbDQ49e9Qi.webp'}
{'country': '', 'singer_id': 1127, 'singer_mid': '0006jIPv0KSzxG', 'singer_name': 'La Costumbre', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M0000006jIPv0KSzxG.webp'}
{'country': '', 'singer_id': 3166, 'singer_mid': '001xFyxE1hfxeS', 'singer_name': 'La Crus', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001xFyxE1hfxeS.webp'}
{'country': '', 'singer_id': 3010, 'singer_mid': '002Pigx306DTbs', 'singer_name': 'La Dama Se Esconde', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002Pigx306DTbs.webp'}
{'country': '', 'singer_id': 239904, 'singer_mid': '001zHCcz3pUhWP', 'singer_name': 'La Despedida De Soltero De...', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001zHCcz3pUhWP.webp'}
{'country': '', 'singer_id': 240207, 'singer_mid': '003KMX4R3jNRi5', 'singer_name': 'LA despedida De Soltero Del Miguel', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003KMX4R3jNRi5.webp'}
{'country': '', 'singer_id': 95301, 'singer_mid': '0041bF4e176Amx', 'singer_name': 'La Fabrique', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M0000041bF4e176Amx.webp'}
{'country': '', 'singer_id': 268543, 'singer_mid': '004XXHwp3jaJou', 'singer_name': 'La Femme Belge', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000004XXHwp3jaJou.webp'}
{'country': '', 'singer_id': 102544, 'singer_mid': '000sgb702FQeuV', 'singer_name': 'La Gran Orquesta Republicana', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000sgb702FQeuV.webp'}
{'country': '', 'singer_id': 238805, 'singer_mid': '000GVua91zY8U6', 'singer_name': 'La Gresca', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000GVua91zY8U6.webp'}
{'country': '', 'singer_id': 3179, 'singer_mid': '0022rCwo2DRHH4', 'singer_name': 'La Ivanne', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M0000022rCwo2DRHH4.webp'}
{'country': '', 'singer_id': 266545, 'singer_mid': '003u0HW11vn68H', 'singer_name': 'La Jodedera', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003u0HW11vn68H.webp'}
{'country': '', 'singer_id': 276503, 'singer_mid': '000QrPh00vqnZZ', 'singer_name': 'La Jose', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000QrPh00vqnZZ.webp'}
{'country': '', 'singer_id': 1132, 'singer_mid': '002Ctxam0qRkqv', 'singer_name': 'La Ley', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002Ctxam0qRkqv.webp'}
{'country': '', 'singer_id': 265185, 'singer_mid': '004Am7iZ284MTe', 'singer_name': 'La Manouche', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000004Am7iZ284MTe.webp'}
{'country': '', 'singer_id': 252635, 'singer_mid': '002TDMjP3aSdGW', 'singer_name': 'La Metralli', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002TDMjP3aSdGW.webp'}
{'country': '', 'singer_id': 99516, 'singer_mid': '002yg5Nd2pt7bi', 'singer_name': 'La MusganA', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002yg5Nd2pt7bi.webp'}
{'country': '', 'singer_id': 114927, 'singer_mid': '0037i9kC3XjuZi', 'singer_name': 'La Pena Negra', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M0000037i9kC3XjuZi.webp'}
{'country': '', 'singer_id': 105207, 'singer_mid': '003pomBG1g3uKA', 'singer_name': 'La Rural Company', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003pomBG1g3uKA.webp'}
{'country': '', 'singer_id': 62939, 'singer_mid': '0045yvAv3gbOhk', 'singer_name': 'La Simphonie du Marais', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M0000045yvAv3gbOhk.webp'}
{'country': '', 'singer_id': 2103, 'singer_mid': '001O5nzn38tdo0', 'singer_name': 'La Sonora Altepexana', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001O5nzn38tdo0.webp'}
{'country': '', 'singer_id': 1541, 'singer_mid': '002gyWK834Cxbx', 'singer_name': 'La Sonora de Margarita', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002gyWK834Cxbx.webp'}
{'country': '', 'singer_id': 233850, 'singer_mid': '003vik191b0euv', 'singer_name': 'La Soul Machine', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003vik191b0euv.webp'}
{'country': '', 'singer_id': 95111, 'singer_mid': '000SRdeE4awDvY', 'singer_name': 'La Squadra', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000SRdeE4awDvY.webp'}
{'country': '', 'singer_id': 1031637, 'singer_mid': '001gMVEi1UQVI8', 'singer_name': 'La Teoría del Taburete', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001gMVEi1UQVI8.webp'}
{'country': '', 'singer_id': 51168, 'singer_mid': '001cDk853nJ6J8', 'singer_name': 'La Toya Jackson', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001cDk853nJ6J8.webp'}
{'country': '', 'singer_id': 3580, 'singer_mid': '003hrO3f4JhMhR', 'singer_name': 'La Union', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003hrO3f4JhMhR.webp'}
{'country': '', 'singer_id': 108355, 'singer_mid': '002J8gr246mcpZ', 'singer_name': 'La Vogue', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002J8gr246mcpZ.webp'}
{'country': '', 'singer_id': 234715, 'singer_mid': '003f5Nft42oOo3', 'singer_name': 'La Zambra', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003f5Nft42oOo3.webp'}
{'country': '', 'singer_id': 100593, 'singer_mid': '004Nsm3X33v6Mv', 'singer_name': "La' Bre", 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000004Nsm3X33v6Mv.webp'}
我们如果把这里的改为2和80,一样可以输出
from urllib import parse
import requests
data='{"comm":{"ct":24,"cv":0},"singerList":{"module":"Music.SingerListServer","method":"get_singer_list","param":{"area":-100,"sex":-100,"genre":-100,"index":12,"sin":80,"cur_page":2}}}'
url='https://u.y.qq.com/cgi-bin/musicu.fcg?-=getUCGI24849744963436082&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data='+parse.quote(data)
response=requests.get(url)
singers=response.json()["singerList"]["data"]["singerlist"]
for singer in singers:
print(singer)
输出:
{'country': '', 'singer_id': 719145, 'singer_mid': '001iY3lP2FjrZL', 'singer_name': "La' Keisha", 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001iY3lP2FjrZL.webp'}
{'country': '', 'singer_id': 2081, 'singer_mid': '001ilEfs352SYO', 'singer_name': 'Laaksonen, Petri', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001ilEfs352SYO.webp'}
{'country': '', 'singer_id': 34286, 'singer_mid': '001LoVEK28eeXx', 'singer_name': 'Laas Unltd', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001LoVEK28eeXx.webp'}
{'country': '', 'singer_id': 96013, 'singer_mid': '002tPUM43tDL51', 'singer_name': 'Labakie Gadi', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002tPUM43tDL51.webp'}
{'country': '', 'singer_id': 241163, 'singer_mid': '000idujx3X11qe', 'singer_name': 'Labhrás Ó Cadhla', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000idujx3X11qe.webp'}
{'country': '', 'singer_id': 11514, 'singer_mid': '0008w8dX3ezkqG', 'singer_name': '蜡笔小新', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M0000008w8dX3ezkqG.webp'}
{'country': '', 'singer_id': 1482, 'singer_mid': '003c8VBt3PaygH', 'singer_name': 'Lace', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003c8VBt3PaygH.webp'}
{'country': '', 'singer_id': 14537, 'singer_mid': '000OaCmg44MOXd', 'singer_name': 'LaceDolL', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000OaCmg44MOXd.webp'}
{'country': '', 'singer_id': 220840, 'singer_mid': '0029qfC71q71nB', 'singer_name': 'Lacee', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M0000029qfC71q71nB.webp'}
{'country': '', 'singer_id': 95933, 'singer_mid': '000cx85Q3tUYGP', 'singer_name': 'Lach', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000cx85Q3tUYGP.webp'}
{'country': '', 'singer_id': 109849, 'singer_mid': '0019R19B3Typ80', 'singer_name': 'Laco Tayfa', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M0000019R19B3Typ80.webp'}
{'country': '', 'singer_id': 1020006, 'singer_mid': '000bKeSL3SdSvH', 'singer_name': 'Lacy Cavalier', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000bKeSL3SdSvH.webp'}
{'country': '', 'singer_id': 17669, 'singer_mid': '000attJH0WL2Tz', 'singer_name': 'Lacy J Dalton', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000attJH0WL2Tz.webp'}
{'country': '', 'singer_id': 142057, 'singer_mid': '0008N65B397Z20', 'singer_name': 'Lacy Younger', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M0000008N65B397Z20.webp'}
{'country': '', 'singer_id': 51170, 'singer_mid': '002w2WMz2Jpwvp', 'singer_name': 'Ladae!', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002w2WMz2Jpwvp.webp'}
{'country': '', 'singer_id': 157870, 'singer_mid': '000n4ITH1fLwk5', 'singer_name': "LADIES' CODE (레이디스 코드)", 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000n4ITH1fLwk5.webp'}
{'country': '', 'singer_id': 13619, 'singer_mid': '004LZp7y0uLPAT', 'singer_name': 'Lady & Bird', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000004LZp7y0uLPAT.webp'}
{'country': '', 'singer_id': 12855, 'singer_mid': '0046CEfN3m9hyj', 'singer_name': 'Lady Antebellum (战前女神)', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M0000046CEfN3m9hyj.webp'}
{'country': '', 'singer_id': 13769, 'singer_mid': '001D3C3T2wxP74', 'singer_name': 'Lady Gaga', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001D3C3T2wxP74.webp'}
{'country': '', 'singer_id': 104832, 'singer_mid': '001zFdwc0aPvqa', 'singer_name': 'Lady I', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001zFdwc0aPvqa.webp'}
{'country': '', 'singer_id': 54567, 'singer_mid': '000csTDM4Tip2O', 'singer_name': 'Lady L', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000csTDM4Tip2O.webp'}
{'country': '', 'singer_id': 103231, 'singer_mid': '004Qz6mR3WsEzi', 'singer_name': 'Lady Libra', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000004Qz6mR3WsEzi.webp'}
{'country': '', 'singer_id': 233433, 'singer_mid': '000IxQxG0AMyca', 'singer_name': 'Lady P', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000IxQxG0AMyca.webp'}
{'country': '', 'singer_id': 2072, 'singer_mid': '002RrV4y1FYw1y', 'singer_name': 'Lady Tom', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002RrV4y1FYw1y.webp'}
{'country': '', 'singer_id': 140255, 'singer_mid': '0041yfuX0vKbeu', 'singer_name': 'Lady Tracey', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M0000041yfuX0vKbeu.webp'}
{'country': '', 'singer_id': 103909, 'singer_mid': '0012D0YD3c5XCv', 'singer_name': 'Ladykillaz', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M0000012D0YD3c5XCv.webp'}
{'country': '', 'singer_id': 2324, 'singer_mid': '002Tq4lP1SsF61', 'singer_name': 'Ladysmith Black Mambazo', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002Tq4lP1SsF61.webp'}
{'country': '', 'singer_id': 99391, 'singer_mid': '0046WHZ010gK9F', 'singer_name': 'Lafayette Gilchrist', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M0000046WHZ010gK9F.webp'}
{'country': '', 'singer_id': 13432, 'singer_mid': '004eucp11pNmRA', 'singer_name': 'Lafee', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000004eucp11pNmRA.webp'}
{'country': '', 'singer_id': 13277, 'singer_mid': '002Y8BCL0sEpTe', 'singer_name': 'Laguardia', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002Y8BCL0sEpTe.webp'}
{'country': '', 'singer_id': 63166, 'singer_mid': '002cQ8Qw1eLL3m', 'singer_name': 'Lahti Symphony Orchestra', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002cQ8Qw1eLL3m.webp'}
{'country': '', 'singer_id': 19466, 'singer_mid': '003yUcCI3BpQ6e', 'singer_name': '来吧!焙焙!', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003yUcCI3BpQ6e.webp'}
{'country': '', 'singer_id': 158227, 'singer_mid': '001oE1ZH39fYjI', 'singer_name': '瀬川英史 (せがわ えいし)', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001oE1ZH39fYjI.webp'}
{'country': '', 'singer_id': 23978, 'singer_mid': '003ZXwAK13bLEq', 'singer_name': '瀬川瑛子 (せがわ えいこ)', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003ZXwAK13bLEq.webp'}
{'country': '', 'singer_id': 324, 'singer_mid': '00176BcW48qLeV', 'singer_name': '赖惠存', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M00000176BcW48qLeV.webp'}
{'country': '', 'singer_id': 46554, 'singer_mid': '0037JloU2WlwIF', 'singer_name': 'Laila Angell', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M0000037JloU2WlwIF.webp'}
{'country': '', 'singer_id': 81394, 'singer_mid': '001DtoDL2gqVs8', 'singer_name': 'Laila Dalseth', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001DtoDL2gqVs8.webp'}
{'country': '', 'singer_id': 96010, 'singer_mid': '001xiMPa05nevP', 'singer_name': 'Laimonis Beginskis', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001xiMPa05nevP.webp'}
{'country': '', 'singer_id': 18388, 'singer_mid': '000M61U500c1nY', 'singer_name': 'Laine', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000M61U500c1nY.webp'}
{'country': '', 'singer_id': 26993, 'singer_mid': '00009pOq3JiVQG', 'singer_name': 'Lainey Lou (莱妮·露)', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M00000009pOq3JiVQG.webp'}
{'country': '', 'singer_id': 3758, 'singer_mid': '004fQ7q30mSD4Z', 'singer_name': '赖伟锋', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000004fQ7q30mSD4Z.webp'}
{'country': '', 'singer_id': 1327, 'singer_mid': '003vNZib2tBHUv', 'singer_name': 'LaVern Baker', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003vNZib2tBHUv.webp'}
{'country': '', 'singer_id': 4672, 'singer_mid': '001Q3BmS4O6H0C', 'singer_name': '赖雅妍', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001Q3BmS4O6H0C.webp'}
{'country': '', 'singer_id': 262643, 'singer_mid': '000NE5ew4aYcDU', 'singer_name': 'Lakatos György és cigányzenekara', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000NE5ew4aYcDU.webp'}
{'country': '', 'singer_id': 110126, 'singer_mid': '00019Ejo3tBcor', 'singer_name': 'Lakhi Banzara', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M00000019Ejo3tBcor.webp'}
{'country': '', 'singer_id': 81402, 'singer_mid': '002A4wgQ3mSZTo', 'singer_name': 'Lakis Pappas', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002A4wgQ3mSZTo.webp'}
{'country': '', 'singer_id': 18123, 'singer_mid': '003lkeaP3UsQWG', 'singer_name': 'Lakissova', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003lkeaP3UsQWG.webp'}
{'country': '', 'singer_id': 145525, 'singer_mid': '003yU6dW0VlCfU', 'singer_name': 'Lakita', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003yU6dW0VlCfU.webp'}
{'country': '', 'singer_id': 167372, 'singer_mid': '001kA97W44dpK6', 'singer_name': 'Lala Romero', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001kA97W44dpK6.webp'}
{'country': '', 'singer_id': 106194, 'singer_mid': '0016zaCe25NawT', 'singer_name': 'Lalann', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M0000016zaCe25NawT.webp'}
{'country': '', 'singer_id': 107701, 'singer_mid': '000fb2Vn32VlJv', 'singer_name': 'Lalita Phadke', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000fb2Vn32VlJv.webp'}
{'country': '', 'singer_id': 109897, 'singer_mid': '002VJrLC4CPDiD', 'singer_name': 'Lalo Olivares', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002VJrLC4CPDiD.webp'}
{'country': '', 'singer_id': 179473, 'singer_mid': '003rmYsr0oQf4N', 'singer_name': 'Lamar Jay', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003rmYsr0oQf4N.webp'}
{'country': '', 'singer_id': 31254, 'singer_mid': '004eEPxA4byuzg', 'singer_name': 'Lambchop', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000004eEPxA4byuzg.webp'}
{'country': '', 'singer_id': 81411, 'singer_mid': '000P98y9421oY0', 'singer_name': 'Lambda', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000P98y9421oY0.webp'}
{'country': '', 'singer_id': 1123, 'singer_mid': '003a88HZ3ccJH9', 'singer_name': 'Lambe Alabakoski', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003a88HZ3ccJH9.webp'}
{'country': '', 'singer_id': 62621, 'singer_mid': '002m40xW0bszhA', 'singer_name': 'Lambert Hendricks & Ross', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002m40xW0bszhA.webp'}
{'country': '', 'singer_id': 258956, 'singer_mid': '001PuFYj3Wn1ta', 'singer_name': 'Lampros Xalkias', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001PuFYj3Wn1ta.webp'}
{'country': '', 'singer_id': 5705, 'singer_mid': '003I3fpP3RZC13', 'singer_name': '嵐 (あらし)', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003I3fpP3RZC13.webp'}
{'country': '', 'singer_id': 36434, 'singer_mid': '0040HbAd0HG7N5', 'singer_name': 'Lana Del Rey (拉娜·德雷)', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M0000040HbAd0HG7N5.webp'}
{'country': '', 'singer_id': 221371, 'singer_mid': '000i8jym4UWVml', 'singer_name': 'Lana Martino-Smith', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000i8jym4UWVml.webp'}
{'country': '', 'singer_id': 41062, 'singer_mid': '003mJSVc2879gO', 'singer_name': 'Lanate', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003mJSVc2879gO.webp'}
{'country': '', 'singer_id': 32048, 'singer_mid': '000RIxZJ19KYRJ', 'singer_name': '蓝波', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000RIxZJ19KYRJ.webp'}
{'country': '', 'singer_id': 33824, 'singer_mid': '004SBAsU3tkMlY', 'singer_name': 'Lance De Sardi', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000004SBAsU3tkMlY.webp'}
{'country': '', 'singer_id': 45825, 'singer_mid': '003wj62c4S0h3Z', 'singer_name': 'Lance Ellis', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003wj62c4S0h3Z.webp'}
{'country': '', 'singer_id': 16202, 'singer_mid': '001i5W9U3dpFxn', 'singer_name': 'Lance Miller', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001i5W9U3dpFxn.webp'}
{'country': '', 'singer_id': 106805, 'singer_mid': '004RDNr33z5IuZ', 'singer_name': 'Landscape Body Machine', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000004RDNr33z5IuZ.webp'}
{'country': '', 'singer_id': 1797, 'singer_mid': '003YeFXn40ciXM', 'singer_name': 'Lane Turner', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003YeFXn40ciXM.webp'}
{'country': '', 'singer_id': 101255, 'singer_mid': '001QQTpv3I82xD', 'singer_name': 'Langas And Manganiars', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001QQTpv3I82xD.webp'}
{'country': '', 'singer_id': 1008544, 'singer_mid': '003E4K1K3rUfRE', 'singer_name': '朗嘎拉姆', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003E4K1K3rUfRE.webp'}
{'country': '', 'singer_id': 1590, 'singer_mid': '002U1A1j0hnmta', 'singer_name': '浪花兄弟', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002U1A1j0hnmta.webp'}
{'country': '', 'singer_id': 41038, 'singer_mid': '000SNaqQ0kQRBI', 'singer_name': '郎军(渔渊)', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000SNaqQ0kQRBI.webp'}
{'country': '', 'singer_id': 8524, 'singer_mid': '001A2KKg0I4LmF', 'singer_name': '郎朗', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001A2KKg0I4LmF.webp'}
{'country': '', 'singer_id': 263491, 'singer_mid': '000WmJo60zM6CT', 'singer_name': 'Language of Shapes', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000WmJo60zM6CT.webp'}
{'country': '', 'singer_id': 96471, 'singer_mid': '002vUV2L396gzK', 'singer_name': 'Languis', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000002vUV2L396gzK.webp'}
{'country': '', 'singer_id': 6326, 'singer_mid': '001HV2B00BZzLw', 'singer_name': '兰红', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001HV2B00BZzLw.webp'}
{'country': '', 'singer_id': 6900, 'singer_mid': '001pwXAF3TSDFY', 'singer_name': '蓝狐乐队', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001pwXAF3TSDFY.webp'}
{'country': '', 'singer_id': 34264, 'singer_mid': '001mSbzt11qDjG', 'singer_name': '蓝井艾露 (藍井エイル)', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000001mSbzt11qDjG.webp'}
{'country': '', 'singer_id': 15116, 'singer_mid': '000GvpHg2IYcJV', 'singer_name': '兰卡措', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000000GvpHg2IYcJV.webp'}
{'country': '', 'singer_id': 36944, 'singer_mid': '003Rzyhv3TJ2Mh', 'singer_name': 'Lano', 'singer_pic': 'http://y.gtimg.cn/music/photo_new/T001R150x150M000003Rzyhv3TJ2Mh.webp'}
看输出结果的话,我们的推测是正确的
6. 获取页面总数
由上面说的,我们推测除了他的详情页的utl,那么下一步我们只需要获取页面总数,就可以了,你可以利用页面的html爬取到尾页,当然也可以从我们total里获得歌手总数除以每页的歌手量,自然就是pages了
pages=int(math.floor(total/80))
因为一个页80个歌手我们直接除以然后向下取整就好了
7封装函数:
def get_singer_mid(i):
print("正在访问分区{}".format(i))
#获取一个分区多少歌手
data='{"comm":{"ct":24,"cv":0},"singerList":{"module":"Music.SingerListServer","method":"get_singer_list","param":{"area":-100,"sex":-100,"genre":-100,"index":%s,"sin":0,"cur_page":0}}}'%i
url='https://u.y.qq.com/cgi-bin/musicu.fcg?-=getUCGI24849744963436082&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data='+parse.quote(data)
response=requests.get(url,headers=headers)
total=response.json()["singerList"]["data"]["total"]
pages=int(math.floor(total/80))
print("当前分区{},一共{}个歌手,一共{}页".format(i,total,pages))
thredpool=ThreadPoolExecutor(max_workers=pages)
#迭代每一页获取每一页下的所有歌手,一个歌手一个线程
sin=0
for page in range(1,pages):
data='{"comm":{"ct":24,"cv":0},"singerList":{"module":"Music.SingerListServer","method":"get_singer_list","param":{"area":-100,"sex":-100,"genre":-100,"index":%s,"sin":%s,"cur_page":%s}}}'%(srt(i),srt(sin),srt(page))
url='https://u.y.qq.com/cgi-bin/musicu.fcg?-=getUCGI24849744963436082&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data='+parse.quote(data)
sings=response.json()["singerList"]["data"]["singerlist"]
for sing in sings:
singer_name=sing["singer_name"]
singer_country=sing["country"]
#mid 用于歌手详情页
singer_mid=sing["singer_mid"]
singer_pic=sing["singer_pic"]
thredpool.submit(get_singer_data,mid=singer_mid,singer_name=singer_name,singer_pic=singer_pic,singer_country=singer_country)
sin+=80
7. 根据mid码访问歌手详情页
有了mid码我们就可以访问详情页了
url=“https://y.qq.com/n/yqq/singer/{}.html”.format(mid)
网页默认只能获取前10首歌曲我们该怎么获取其他歌曲呢?一歌曲翻页问题如何解决?
我们采取老办法,寻找接口和传参数据:
在text里编写测试:
from urllib import parse
import requests
data='{"comm":{"ct":24,"cv":0},"singerSongList":{"method":"GetSingerSongList","param":{"order":1,"singerMid":"0025NhlN2yWrP4","begin":0,"num":10},"module":"musichall.song_list_server"}}'
url='https://u.y.qq.com/cgi-bin/musicu.fcg?-=getUCGI24849744963436082&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data='+parse.quote(data)
response=requests.get(url)
singers=response.json()["singerSongList"]["data"]["songList"]
for singer in singers:
print(singer)
输出:
{'songInfo': {'id': 237773700, 'type': 0, 'mid': '001qvvgF38HVc4', 'name': '说好不哭(with 五月天阿信)', 'title': '说好不哭(with 五月天阿信)', 'subtitle': '', 'singer': [{'id': 4558, 'mid': '0025NhlN2yWrP4', 'name': '周杰伦', 'title': '周杰伦', 'type': 0, 'uin': 0}], 'album': {'id': 7876962, 'mid': '002gBTVk4JEE2T', 'name': '说好不哭(with 五月天阿信)', 'title': '说好不哭(with 五月天阿信)', 'subtitle': '', 'time_public': '2019-09-16', 'pmid': '002gBTVk4JEE2T_2'}, 'mv': {'id': 1568937, 'vid': 't0032kwa29w', 'name': '', 'title': '', 'vt': 0}, 'interval': 222, 'isonly': 1, 'language': 0, 'genre': 1, 'index_cd': 0, 'index_album': 1, 'time_public': '2019-09-16', 'status': 0, 'fnote': 4009, 'file': {'media_mid': '001qvvgF38HVc4', 'size_24aac': 0, 'size_48aac': 1346650, 'size_96aac': 2711624, 'size_192ogg': 4800957, 'size_192aac': 5352990, 'size_128mp3': 3558702, 'size_320mp3': 8896459, 'size_ape': 0, 'size_flac': 43879570, 'size_dts': 0, 'size_try': 960887, 'try_begin': 75544, 'try_end': 99601, 'url': '', 'size_hires': 0, 'hires_sample': 0, 'hires_bitdepth': 0, 'b_30s': 0, 'e_30s': 60000, 'size_96ogg': 2445475}, 'pay': {'pay_month': 1, 'price_track': 200, 'price_album': 300, 'pay_play': 1, 'pay_down': 1, 'pay_status': 0, 'time_free': 0}, 'action': {'switch': 17405185, 'msgid': 13, 'alert': 2, 'icons': 12861308, 'msgshare': 0, 'msgfav': 0, 'msgdown': 0, 'msgpay': 6}, 'ksong': {'id': 6222280, 'mid': '0014Y4JW2xOFyY'}, 'volume': {'gain': -6.165, 'peak': 0.926, 'lra': 12.839}, 'label': '0', 'url': '', 'bpm': 75, 'version': 0, 'trace': '', 'data_type': 0, 'modify_stamp': 0, 'pingpong': '', 'aid': 0, 'ppurl': '', 'tid': 0, 'ov': 0, 'sa': 0, 'es': ''}}
这里的参数设置,我们只能获取这几首歌,注意到我们的参数里里面的begin和num,我们试着把num改为820(total数)试一下:
输出:
过于多 略
这显然是不够的820的,那么我们编写一个循环,来获取最多的num数:
from urllib import parse
import requests
data='{"comm":{"ct":24,"cv":0},"singerSongList":{"method":"GetSingerSongList","param":{"order":1,"singerMid":"0025NhlN2yWrP4","begin":0,"num":820},"module":"musichall.song_list_server"}}'
url='https://u.y.qq.com/cgi-bin/musicu.fcg?-=getUCGI24849744963436082&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data='+parse.quote(data)
response=requests.get(url)
singers=response.json()["singerSongList"]["data"]["songList"]
num=0
for singer in singers:
print(singer)
num+=1
print(num)
输出:100
这个时候我们不妨试着把begin改为100,再试一下
from urllib import parse
import requests
data='{"comm":{"ct":24,"cv":0},"singerSongList":{"method":"GetSingerSongList","param":{"order":1,"singerMid":"0025NhlN2yWrP4","begin":100,"num":820},"module":"musichall.song_list_server"}}'
url='https://u.y.qq.com/cgi-bin/musicu.fcg?-=getUCGI24849744963436082&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data='+parse.quote(data)
response=requests.get(url)
singers=response.json()["singerSongList"]["data"]["songList"]
num=0
for singer in singers:
print(singer)
num+=1
print(num)
输出:
过于多 略
这个时候我们就知道了,
begin是从哪里开始获取,而num是你要的数量,且这个数量最多能返回80个
我们只要写一个迭代迭代begin就能获取全部歌曲了,可在这之前,我们还需要获取一个total总数
8. 封装函数
def get_singer_data(mid,singer_name,singer_country,singer_pic):
#一开始只获取total多少首歌
data='{"comm":{"ct":24,"cv":0},"singerSongList":{"method":"GetSingerSongList","param":{"order":1,"singerMid":"%s","begin":100,"num":820},"module":"musichall.song_list_server"}}'%mid
url='https://u.y.qq.com/cgi-bin/musicu.fcg?-=getUCGI24849744963436082&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data='+parse.quote(data)
response=requests.get(url)
totalNum=response.json()["singerSongList"]["data"]["totalNum"]
print("{}有{}首歌".format(singer_name,totalNum))
#每次循环展示100个歌曲
totalRange=int(math.floor(totalNum/80))
begin=0
for i in range(totalRange):
data='{"comm":{"ct":24,"cv":0},"singerSongList":{"method":"GetSingerSongList","param":{"order":1,"singerMid":"%s","begin":%s,"num":%s},"module":"musichall.song_list_server"}}'%(str(mid),str(begin),str(totalNum))
url='https://u.y.qq.com/cgi-bin/musicu.fcg?-=getUCGI24849744963436082&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data='+parse.quote(data)
response=response.get(url,headers=headers).json()
for song in response["singerSongList"]["data"]["songList"]:
#每一个歌手的信息存取一个字典里
inner_song={}
inner_song["song_name"]=song["name"]
inner_song["album_name"]=song["album"]["name"]
inner_song["singer_name"]=singer_name
inner_song["country"]=singer_country
inner_song["singer_pic"]=singer_pic
print(inner_song)
write_txt(inner_song)
begin+=100
9. 完结第一部分
这个时候我们可以爬取全部的歌曲信息了,我们可以写入txt文件里
import requests
import re,json,csv
from urllib import parse
import math
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36',
'Referer': 'https://y.qq.com/',
'DNT':'1'}
def get_singer_data(mid,singer_name,singer_country,singer_pic):
#一开始只获取total多少首歌
data='{"comm":{"ct":24,"cv":0},"singerSongList":{"method":"GetSingerSongList","param":{"order":1,"singerMid":"%s","begin":100,"num":820},"module":"musichall.song_list_server"}}'%mid
url='https://u.y.qq.com/cgi-bin/musicu.fcg?-=getUCGI24849744963436082&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data='+parse.quote(data)
response=requests.get(url)
totalNum=response.json()["singerSongList"]["data"]["totalNum"]
#print(totalNum)
#print("{}有{}首歌".format(singer_name,totalNum))
#每次循环展示100个歌曲
totalRange=int(math.floor(totalNum/80))
#print(totalRange)
begin=0
for i in range(totalRange):
data='{"comm":{"ct":24,"cv":0},"singerSongList":{"method":"GetSingerSongList","param":{"order":1,"singerMid":"%s","begin":%s,"num":%s},"module":"musichall.song_list_server"}}'%(str(mid),str(begin),str(totalNum))
url='https://u.y.qq.com/cgi-bin/musicu.fcg?-=getUCGI24849744963436082&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data='+parse.quote(data)
response=requests.get(url,headers=headers)
#print(response)
response=response.json()
#print(response)
for song in response["singerSongList"]["data"]["songList"]:
#每一个歌手的信息存取一个字典里
inner_song={}
inner_song["song_name"]=song['songInfo']["name"]
inner_song["album_name"]=song['songInfo']["album"]["name"]
inner_song["singer_name"]=singer_name
inner_song["country"]=singer_country
inner_song["singer_pic"]=singer_pic
print(inner_song)
write_txt(inner_song)
begin+=100
def write_txt(row):
with open("singer.txt","a+",encoding="utf8") as f:
f.write(row+"\n")
def get_singer_mid(i):
print("正在访问分区{}".format(i))
#获取一个分区多少歌手
data='{"comm":{"ct":24,"cv":0},"singerList":{"module":"Music.SingerListServer","method":"get_singer_list","param":{"area":-100,"sex":-100,"genre":-100,"index":%s,"sin":0,"cur_page":0}}}'%i
url='https://u.y.qq.com/cgi-bin/musicu.fcg?-=getUCGI24849744963436082&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data='+parse.quote(data)
response=requests.get(url,headers=headers)
#print(response.text)
total=response.json()["singerList"]["data"]["total"]
#print(total)
pages=int(math.floor(total/80))
print("当前分区{},一共{}个歌手,一共{}页".format(i,total,pages))
thredpool=ThreadPoolExecutor(max_workers=pages)
#迭代每一页获取每一页下的所有歌手,一个歌手一个线程
sin=0
for page in range(1,pages):
data='{"comm":{"ct":24,"cv":0},"singerList":{"module":"Music.SingerListServer","method":"get_singer_list","param":{"area":-100,"sex":-100,"genre":-100,"index":%s,"sin":%s,"cur_page":%s}}}'%(str(i),str(sin),str(page))
url='https://u.y.qq.com/cgi-bin/musicu.fcg?-=getUCGI24849744963436082&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data='+parse.quote(data)
response=requests.get(url,headers=headers)
sings=response.json()["singerList"]["data"]["singerlist"]
for sing in sings:
singer_name=sing["singer_name"]
singer_country=sing["country"]
#mid 用于歌手详情页
singer_mid=sing["singer_mid"]
singer_pic=sing["singer_pic"]
thredpool.submit(get_singer_data,mid=singer_mid,singer_name=singer_name,singer_pic=singer_pic,singer_country=singer_country)
sin+=80
def myProcess():
#把歌手按照子母分区,分为27个进程处理
with ProcessPoolExecutor(max_workers=27) as p:
for i in range(1,28):
p.submit(get_singer_mid,i)
if __name__ == '__main__':
myProcess()
10音乐下载
对于每个qq音乐的音乐界面,可以看他的url还是我们的mid码
url:https://y.qq.com/n/yqq/song/000gnHiB3wKUKz.html
播放网址没有参数
那一定是使用了接口
我们在播放的时候查看media,发现有7个音乐请求,但只有一个有用(最大的那个)
四个重要参数,多看几个网址,发现vkey是变化的,其他不变也可以
思路:获取这个加密的vkey:
通过在all里寻找,发现了含有vkey的:
Vkey是通过api获取的,我们伪造这个请求即可通过观察requests中的参数,我们发现referer确实是指向pkayer的
也就是说,player是 通过发送这个请求获取音乐的
我们在测试文件里进行测试,我们先只变化songmid观察可否获取数据
import requests
from urllib import parse
data='{"req":{"module":"CDN.SrfCdnDispatchServer",\
"method":"GetCdnDispatch","param":{"guid":"6522780672","calltype":0,"userip":""}},\
"req_0":{"module":"vkey.GetVkeyServer","method":"CgiGetVkey","param":{"guid":"6522780672",\
"songmid":["%s"],"songtype":[0],"uin":"0","loginflag":1,\
"platform":"20"}},"comm":{"uin":0,"format":"json","ct":24,"cv":0}}' %'000gnHiB3wKUKz'
url="https://u.y.qq.com/cgi-bin/musicu.fcg?-=getplaysongvkey2091814222203221&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data=%7B%22req%22%3A%7B%22module%22%3A%22CDN.SrfCdnDispatchServer%22%2C%22method%22%3A%22GetCdnDispatch%22%2C%22param%22%3A%7B%22guid%22%3A%226522780672%22%2C%22calltype%22%3A0%2C%22userip%22%3A%22%22%7D%7D%2C%22req_0%22%3A%7B%22module%22%3A%22vkey.GetVkeyServer%22%2C%22method%22%3A%22CgiGetVkey%22%2C%22param%22%3A%7B%22guid%22%3A%226522780672%22%2C%22songmid%22%3A%5B%22000gnHiB3wKUKz%22%5D%2C%22songtype%22%3A%5B0%5D%2C%22uin%22%3A%220%22%2C%22loginflag%22%3A1%2C%22platform%22%3A%2220%22%7D%7D%2C%22comm%22%3A%7B%22uin%22%3A0%2C%22format%22%3A%22json%22%2C%22ct%22%3A24%2C%22cv%22%3A0%7D%7D"
respose=requests.get(url).json()
print(respose)
输出:
{'code': 0, 'ts': 1583228007813, 'req': {'code': 0, 'data': {'expiration': 86400, 'freeflowsip': ['http://221.180.248.51/amobile.music.tc.qq.com/', 'http://221.180.248.18/amobile.music.tc.qq.com/', 'http://221.180.248.19/amobile.music.tc.qq.com/'], 'keepalivefile': 'C400004TsFuW2mZbRR.m4a?guid=6522780672&vkey=61C26CF2D6C8D2E93DCBD8A922B26C7D013F2C1907191FC6130EFF23279B65FA5D1A2A48A27D1F3C734407BC1031DB839BCABD7C33DB5D4B&uin=0&fromtag=3', 'msg': 'ok', 'retcode': 0, 'servercheck': '9b9cc417387784f41c3456cac2ec34fc', 'sip': ['http://ws.stream.qqmusic.qq.com/', 'http://isure.stream.qqmusic.qq.com/', 'http://221.180.248.51/amobile.music.tc.qq.com/', 'http://221.180.248.18/amobile.music.tc.qq.com/', 'http://221.180.248.19/amobile.music.tc.qq.com/'], 'testfile2g': 'C400003mAan70zUy5O.m4a?guid=6522780672&vkey=7FBDFC3476CEFF97804AF5224BA9FB233CD7D3AC4297A342458BABFE487E026416A6DED49D19252931D52455DAEFE8FF7783DEF2709593EB&uin=0&fromtag=3', 'testfilewifi': 'C400003mAan70zUy5O.m4a?guid=6522780672&vkey=7FBDFC3476CEFF97804AF5224BA9FB233CD7D3AC4297A342458BABFE487E026416A6DED49D19252931D52455DAEFE8FF7783DEF2709593EB&uin=0&fromtag=3', 'uin': '', 'userip': '112.39.200.99', 'vkey': 'D75994FCEA2DAD288A599F5C4493CBD67D27806642FCCF007F549D5D224E28A9A224178A4B48788AA9793BCD838B366463E5838DCF342F42'}}, 'req_0': {'code': 0, 'data': {'expiration': 80400, 'login_key': '', 'midurlinfo': [{'common_downfromtag': 0, 'errtype': '', 'filename': 'C400000gnHiB3wKUKz.m4a', 'flowfromtag': '', 'flowurl': '', 'hisbuy': 0, 'hisdown': 0, 'isbuy': 0, 'isonly': 0, 'onecan': 0, 'opi128kurl': '', 'opi192koggurl': '', 'opi192kurl': '', 'opi30surl': '', 'opi48kurl': '', 'opi96kurl': '', 'opiflackurl': '', 'p2pfromtag': 0, 'pdl': 0, 'pneed': 0, 'pneedbuy': 0, 'premain': 0, 'purl': 'C400000gnHiB3wKUKz.m4a?guid=6522780672&vkey=6A483FECC8E6D0C896BD8027431A3686777B0B053AD6956EC04B27F6D39B047B59D55797EE0041ABE6ADA084C966A8BA4C5B247AD554B321&uin=0&fromtag=66', 'qmdlfromtag': 0, 'result': 0, 'songmid': '000gnHiB3wKUKz', 'tips': '', 'uiAlert': 0, 'vip_downfromtag': 0, 'vkey': '6A483FECC8E6D0C896BD8027431A3686777B0B053AD6956EC04B27F6D39B047B59D55797EE0041ABE6ADA084C966A8BA4C5B247AD554B321', 'wififromtag': '', 'wifiurl': ''}], 'msg': '112.39.200.99', 'retcode': 0, 'servercheck': '9b9cc417387784f41c3456cac2ec34fc', 'sip': ['http://ws.stream.qqmusic.qq.com/', 'http://isure.stream.qqmusic.qq.com/'], 'testfile2g': 'C400003mAan70zUy5O.m4a?guid=6522780672&vkey=100E86968A4A1EC05E7F22842DF43ED5F93968F51578793E4BFB35016794D10F99FEE210F9B604D19EDAA1F2FD874ABC9CC19C0D29F9C784&uin=&fromtag=3', 'testfilewifi': 'C400003mAan70zUy5O.m4a?guid=6522780672&vkey=100E86968A4A1EC05E7F22842DF43ED5F93968F51578793E4BFB35016794D10F99FEE210F9B604D19EDAA1F2FD874ABC9CC19C0D29F9C784&uin=&fromtag=3', 'thirdip': ['', ''], 'uin': '', 'verify_type': 0}}}
这里返回成功了,但这里面有很多vkey
Player请求了5次成功,说明这里边的接口有许多无效,
我们也需要一个一个试探
这里只有midurlinfo里 边的purl是可用的
接下来我们只需要选择下载源就,可以了
下载源的选择很重 要否则会 有很多垃圾文件产生(qq音乐会经常更换,要自己调整)
下载文件的时候可以使用流下载,减少CPU压力
def download(songmid):
session = requests.session()
params=songmid
data='{"req":{"module":"CDN.SrfCdnDispatchServer",\
"method":"GetCdnDispatch","param":{"guid":"6522780672","calltype":0,"userip":""}},\
"req_0":{"module":"vkey.GetVkeyServer","method":"CgiGetVkey","param":{"guid":"6522780672",\
"songmid":["%s"],"songtype":[0],"uin":"0","loginflag":1,\
"platform":"20"}},"comm":{"uin":0,"format":"json","ct":24,"cv":0}}' %songmid
url='gvkey2091814222203221&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data={}'.format(parse.quote(data))
response=session.get(url).json()
veky=response['req_0']["data"]["midurlinfo"][0]["purl"]
music_url = 'http://dl.stream.qqmusic.qq.com/C400'+params+'.m4a?vkey='+vkey+'&guid=9082027038&uin=0&fromtag=66'
response=session.get(url=music_url,headers=headers)
with open("music{}.mp3".format(params),"wb") as f:
#for chunk in response.itet_content(1024):
f.write(response.content)
全部代码:
import requests
import re,json,csv
from urllib import parse
import math
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36',
'Referer': 'https://y.qq.com/',
'DNT':'1'}
def get_singer_data(mid,singer_name,singer_country,singer_pic):
#一开始只获取total多少首歌
data='{"comm":{"ct":24,"cv":0},"singerSongList":{"method":"GetSingerSongList","param":{"order":1,"singerMid":"%s","begin":100,"num":820},"module":"musichall.song_list_server"}}'%mid
url='https://u.y.qq.com/cgi-bin/musicu.fcg?-=getUCGI24849744963436082&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data='+parse.quote(data)
response=requests.get(url)
totalNum=response.json()["singerSongList"]["data"]["totalNum"]
#print(totalNum)
#print("{}有{}首歌".format(singer_name,totalNum))
#每次循环展示100个歌曲
totalRange=int(math.floor(totalNum/80))
#print(totalRange)
begin=0
for i in range(totalRange):
data='{"comm":{"ct":24,"cv":0},"singerSongList":{"method":"GetSingerSongList","param":{"order":1,"singerMid":"%s","begin":%s,"num":%s},"module":"musichall.song_list_server"}}'%(str(mid),str(begin),str(totalNum))
url='https://u.y.qq.com/cgi-bin/musicu.fcg?-=getUCGI24849744963436082&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data='+parse.quote(data)
response=requests.get(url,headers=headers)
#print(response)
response=response.json()
#print(response)
for song in response["singerSongList"]["data"]["songList"]:
#每一个歌手的信息存取一个字典里
inner_song={}
inner_song["song_name"]=song['songInfo']["name"]
inner_song["album_name"]=song['songInfo']["album"]["name"]
inner_song["singer_name"]=singer_name
inner_song["country"]=singer_country
inner_song["singer_pic"]=singer_pic
inner_song['song_mid']=song['songInfo']['mid']
#print(inner_song['song_mid'])
download(inner_song["song_mid"])
#print(inner_song)
#write_txt(inner_song)
begin+=100
def download(songmid):
session = requests.session()
params=songmid
data='{"req":{"module":"CDN.SrfCdnDispatchServer",\
"method":"GetCdnDispatch","param":{"guid":"6522780672","calltype":0,"userip":""}},\
"req_0":{"module":"vkey.GetVkeyServer","method":"CgiGetVkey","param":{"guid":"6522780672",\
"songmid":["%s"],"songtype":[0],"uin":"0","loginflag":1,\
"platform":"20"}},"comm":{"uin":0,"format":"json","ct":24,"cv":0}}' %songmid
url="https://u.y.qq.com/cgi-bin/musicu.fcg?-=getplaysongvkey2091814222203221&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data={}".format(data)
response=session.get(url).json()
vkey=response['req_0']["data"]["midurlinfo"][0]["purl"]
music_url = 'http://dl.stream.qqmusic.qq.com/C400'+params+'.m4a?vkey='+vkey+'&guid=9082027038&uin=0&fromtag=66'
response=session.get(url=music_url,headers=headers)
with open("music{}.mp3".format(params),"wb") as f:
#for chunk in response.itet_content(1024):
f.write(response.content)
def write_txt(row):
with open("singer.txt","a+",encoding="utf8") as f:
f.write(row+"\n")
def get_singer_mid(i):
#print("正在访问分区{}".format(i))
#获取一个分区多少歌手
data='{"comm":{"ct":24,"cv":0},"singerList":{"module":"Music.SingerListServer","method":"get_singer_list","param":{"area":-100,"sex":-100,"genre":-100,"index":%s,"sin":0,"cur_page":0}}}'%i
url='https://u.y.qq.com/cgi-bin/musicu.fcg?-=getUCGI24849744963436082&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data='+parse.quote(data)
response=requests.get(url,headers=headers)
#print(response.text)
total=response.json()["singerList"]["data"]["total"]
#print(total)
pages=int(math.floor(total/80))
#print("当前分区{},一共{}个歌手,一共{}页".format(i,total,pages))
thredpool=ThreadPoolExecutor(max_workers=5)
#迭代每一页获取每一页下的所有歌手,一个歌手一个线程
sin=0
for page in range(1,pages):
data='{"comm":{"ct":24,"cv":0},"singerList":{"module":"Music.SingerListServer","method":"get_singer_list","param":{"area":-100,"sex":-100,"genre":-100,"index":%s,"sin":%s,"cur_page":%s}}}'%(str(i),str(sin),str(page))
url='https://u.y.qq.com/cgi-bin/musicu.fcg?-=getUCGI24849744963436082&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data='+parse.quote(data)
response=requests.get(url,headers=headers)
sings=response.json()["singerList"]["data"]["singerlist"]
for sing in sings:
singer_name=sing["singer_name"]
singer_country=sing["country"]
#mid 用于歌手详情页
singer_mid=sing["singer_mid"]
singer_pic=sing["singer_pic"]
thredpool.submit(get_singer_data,mid=singer_mid,singer_name=singer_name,singer_pic=singer_pic,singer_country=singer_country)
sin+=80
def myProcess():
#把歌手按照子母分区,分为27个进程处理
with ProcessPoolExecutor(max_workers=5) as p:
for i in range(1,28):
p.submit(get_singer_mid,i)
if __name__ == '__main__':
myProcess()
特别的,测试代码:用于测试vkey
import requests
import re,json,csv
from urllib import parse
import math
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
headers={'accept':'*/*',
'accept-encoding':'gzip, deflate, br',
'accept-language':'zh-CN,zh;q=0.9',
'cache-control':'no-cache',
'content-type':'application/x-www-form-urlencoded; charset=UTF-8',
'referer':'https://y.qq.com/portal/player.html',
'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.6726.400 QQBrowser/10.2.2265.400'
}
songmid='000gnHiB3wKUKz'
params=songmid
data='{"req":{"module":"CDN.SrfCdnDispatchServer",\
"method":"GetCdnDispatch","param":{"guid":"6522780672","calltype":0,"userip":""}},\
"req_0":{"module":"vkey.GetVkeyServer","method":"CgiGetVkey","param":{"guid":"6522780672",\
"songmid":["%s"],"songtype":[0],"uin":"0","loginflag":1,\
"platform":"20"}},"comm":{"uin":0,"format":"json","ct":24,"cv":0}}' %songmid
url="https://u.y.qq.com/cgi-bin/musicu.fcg?-=getplaysongvkey2091814222203221&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data={}".format(data)
response=requests.get(url,headers=headers).json()
vkey=response['req_0']["data"]["midurlinfo"][0]["purl"]
music_url = 'http://dl.stream.qqmusic.qq.com/C400'+params+'.m4a?vkey='+vkey+'&guid=9082027038&uin=0&fromtag=66'
response=requests.get(url=music_url,headers=headers)
with open("music{}.mp3".format(params),"wb") as f:
#for chunk in response.itet_content(1024):
f.write(response.content)
或者:
import requests
import re,json,csv
from urllib import parse
import math
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
'''headers={
'Connection': 'keep-alive',
'Cookie': 'pgv_pvi=5738560512; RK=UQi4yUTZPQ; ptcz=7cb0fd765b6aedcb301706253aa14c294fc850019ad08c112b75b5a7b925793e; pgv_pvid=6522780672; tvfe_boss_uuid=f75bf93079ec7ada; pac_uid=0_5da5c1b164544; Qs_lvt_323937=1579934062; Qs_pv_323937=379049183878492540; _ga=GA1.2.934720242.1579934063; ptui_loginuin=296853751; _qpsvr_localtk=0.27578797761090845; pgv_si=s3077676032; pgv_info=ssid=s3876670680; qqmusic_fromtag=66; psrf_qqaccess_token=95B0C100CFEDFD660C99F9DCBE63B507; psrf_qqopenid=EA5DBB49AC132511E4D650922B32CA28; psrf_qqunionid=16909C9C02FCE39EDB5C0C52BAA7A2A5; psrf_access_token_expiresAt=1591010538; qm_keyst=Q_H_L_2eUFUv50eeGozoJ0VWeewLQwMIzOJ4WTtZ21vJNTesyb8wrrvdlPeIKE3YJ-Yt9; qqmusic_key=Q_H_L_2eUFUv50eeGozoJ0VWeewLQwMIzOJ4WTtZ21vJNTesyb8wrrvdlPeIKE3YJ-Yt9; psrf_musickey_createtime=1583234538; psrf_qqrefresh_token=1611185CAA61F1D6A03BFC2D2CFD2E22; uin=296853751',
'Host': 'ws.stream.qqmusic.qq.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36'
}'''
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36',
'Referer': 'https://y.qq.com/',
'DNT':'1'}
songmid='C400004OSykC0Sp0Iq'
params=songmid
data='{"req":{"module":"CDN.SrfCdnDispatchServer",\
"method":"GetCdnDispatch","param":{"guid":"6522780672","calltype":0,"userip":""}},\
"req_0":{"module":"vkey.GetVkeyServer","method":"CgiGetVkey","param":{"guid":"6522780672",\
"songmid":["%s"],"songtype":[0],"uin":"0","loginflag":1,\
"platform":"20"}},"comm":{"uin":0,"format":"json","ct":24,"cv":0}}' %songmid
url="https://u.y.qq.com/cgi-bin/musicu.fcg?-=getplaysongvkey2091814222203221&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data={}".format(data)
response=requests.get(url,headers=headers).json()
vkey="7DAC478FBD0CD079DB8067150B5FB9F7307E73CEE52885736D447E485CEA2CAFD3CA69F984F4AEACA1E30EF38836DC2AA5752CCA67205953"
music_url = 'http://221.180.248.19/amobile.music.tc.qq.com/'+params+'.m4a?guid=6522780672&vkey='+vkey+'&uin=247&fromtag=66'
print(music_url)
response=requests.get(url=music_url,headers=headers)
with open("music{}.mp3".format(params),"wb") as f:
#for chunk in response.itet_content(1024):
f.write(response.content)
11存储为数据库文件
from sqlalchemy import *
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
engine = create_engine('mysql+pymysql://root:1234@localhost:3306/test1?charset=utf8')
DBsession = sessionmaker(bind=engine)
SQLsession = DBsession()
Base = declarative_base()
class song(Base):
__tablename__='song'
song_id=Column(Integer,primary_key=True,autoincrement=True)
song_name=Column(String(64))
song_ablum=Column(String(64))
song_mid=Column(String(50))
song_singer=Column(String(50))
Base.metadata.create_all(engine)
def insert_data(songs):
engine=create_engine('mysql+pymysql://root:1234@localhost:3306/test1?charset=utf8')
DBsession=sessionmaker(bind=engine)
SQLsession=DBsession()
data=song(
song_name= songs['songname'],
song_ablum= songs['albumname'],
song_mid=songs['songmid'],
song_singer= songs['song_singer']
)
SQLsession.add(data)
SQLsession.commit()