爬取”漫画岛“《鬼抬轿》
# 导入第三方库
import requests
from bs4 import BeautifulSoup
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 \
Safari/537.36'
}
# 获取请求,获取网页源代码
def open_url(url):
r = requests.get(url, headers=headers)
return r.text
# 获得各个图片链接并保存
def save_img(contents):
u = []
soup = BeautifulSoup(contents, 'lxml')
img_urls = soup.find_all('li', style="margin-bottom:0")
name_num = 1
for img_url in img_urls:
ul = img_url.img.get('src')
rs = requests.get(ul).content
# 保存图片到本地
with open('《鬼抬轿》第一话第{}页'.format(name_num) + '.jpg', 'wb') as e: # 'wb'以二进制文件写入
e.write(rs)
print('漫画《鬼抬轿》第一话第{}页已经下载完成!'.format(name_num))
name_num += 1
# 主程序
def main():
url = 'http://www.manhuadao.cn/Comic/ComicView?comicid=58ddaeb027a7c1392c224ce5'
response = open_url(url)
save_img(response)
# 执行程序
if __name__ == '__main__':
main()