天气预报获取模块
基于做了代码优化,每个函数功能更加明确,更加适合单独集成。接口也进行了优化,比网络上博主抄来抄去的源码要好一些。
改进点:获取天气拆成了多个函数,多次保存csv文件不再出现标题栏重写的情况。
天气获取模块
1.1 依赖表
包名 | 含义 | 安装方式 |
系统包 | ||
os | ||
csv | ||
json | ||
第三方依赖 | ||
requests | 爬虫模块 | |
bs4.BeautifulSoup | 网页解析对象 | |
bs4.element.Tag | 网页标签对象 |
1.2 全局变量表
变量名 | 含义 | 初值 |
1.3 函数
函数名 | 含义 | 是否接口 |
get_html_text | 请求网页内容,无代理无head模式 | 否 |
get_today_weather | 获取当天天气 | 否 |
get_1_7_weather | 获取当周天气 | 否 |
get_today_and_week | 获取当周和当天的天气 | 否 |
get_8_14_weather | 获取下一周天气 | 否 |
write_to_csv | 保存文件模块 | 否 |
1.4 接口函数
函数名 | 含义 | |
get_two_weak_weather | 获取两周天气 | |
入参 | 含义 | 类型 |
city_code | 城市代码 | 字符串 |
出参 | ||
None |
1.5 代码
# weather.py
import os
import csv
import json
import requests
from bs4 import BeautifulSoup
from bs4.element import Tag
def get_html_text(url):
"""请求获得网页内容"""
try:
r = requests.get(url, timeout=30)
r.raise_for_status()
r.encoding = r.apparent_encoding
print("成功访问")
return r.text
except Exception as e:
print(e)
print("访问错误")
return " "
def get_today_weather(body_tag: Tag):
td_wea_list = [] # 存放当天的数据,list
count = 0
def get_today_json(_tag: Tag):
# 获取今日数据的script
weather_div = _tag.find_all('div', {'class': 'left-div'})
observe24h_data = weather_div[2].find('script').string
# 将 script 数据改变成为 json 数据 (移除 var data=)
begin_index = observe24h_data.index('=') + 1
end_index = -2
observe24h_data = observe24h_data[begin_index: end_index]
observe24h_json = json.loads(observe24h_data)
t_json = observe24h_json['od']['od2']
# 补充空气质量
if t_json[0]['od28'] == "":
t_json[0]['od28'] = t_json[1]['od28']
return t_json
today_json = get_today_json(body_tag)
for i in today_json:
od_wea_list = []
if count <= 23:
od_wea_list.append(i['od21']) # 添加时间
od_wea_list.append(i['od22']) # 添加当前时刻温度
od_wea_list.append(i['od24']) # 添加当前时刻风力方向
od_wea_list.append(i['od25']) # 添加当前时刻风级
od_wea_list.append(i['od26']) # 添加当前时刻降水量
od_wea_list.append(i['od27']) # 添加当前时刻相对湿度
od_wea_list.append(i['od28']) # 添加当前时刻空气质量
print(od_wea_list)
td_wea_list.append(od_wea_list)
count = count + 1
print(td_wea_list)
return td_wea_list
def get_1_7_weather(body_tag: Tag):
week_wea_list = list() # 初始化一个列表保存数据,返回值
# 解析7天的数据, body -> div -> ul -> ui
data = body_tag.find('div', {'id': '7d'})
ul = data.find_all('ul')[0]
li = ul.find_all('li')
for day in li: # 遍历找到的每一个li
# 临时存放每天的数据
od_wea_list = list()
# 获取日期
date = day.find('h1').string
date = date[0:date.index('日')]
od_wea_list.append(date)
# 找出li下面的p标签,有三个p标签,分别为天气,气温,风向
inf = day.find_all('p')
# 提取第一个p标签的值,即天气
od_wea_list.append(inf[0].string)
# 提取第二个p标签的值,最高气温和最低气温
if inf[1].find('i') is None:
tem_low = None
else:
tem_low = inf[1].find('i').string # 找到最低气温
if inf[1].find('span') is None: # 天气预报可能没有最高气温
tem_high = None
else:
tem_high = inf[1].find('span').string # 找到最高气温
if tem_low[-1] == '℃':
od_wea_list.append(tem_low[:-1])
else:
od_wea_list.append(tem_low)
if tem_high[-1] == '℃':
od_wea_list.append(tem_high[:-1])
else:
od_wea_list.append(tem_high)
# 提取第三个p标签的值,初始风向和转风向,风级
wind = inf[2].find_all('span') # 找到风向
for j in wind:
od_wea_list.append(j['title'])
wind_scale = inf[2].find('i').string # 找到风级
index1 = wind_scale.index('级')
od_wea_list.append(int(wind_scale[index1 - 1:index1]))
# 添加日志
week_wea_list.append(od_wea_list)
# print(week_wea_list)
return week_wea_list
def get_today_and_week(html: str):
"""处理得到有用信息保存数据文件"""
bs = BeautifulSoup(html, "html.parser") # 创建BeautifulSoup对象
body = bs.body
# 获取当天数据
td_wea_list = get_today_weather(body)
# 获取七天内的数据
week_wea_list = get_1_7_weather(body)
return td_wea_list, week_wea_list
def get_8_14_weather(html):
week_wea_list = []
i = 0 # 控制爬取的天数
bs = BeautifulSoup(html, "html.parser") # 创建BeautifulSoup对象
body = bs.body
data = body.find('div', {'id': '15d'}) # 找到div标签且id = 15d
ul = data.find('ul') # 找到所有的ul标签
li = ul.find_all('li') # 找到左右的li标签
for day in li: # 遍历找到的每一个li
if i < 7:
od_wea_list = list()
# 获取日期
date = day.find('span', {'class': 'time'}).string # 得到日期
date = date[date.index('(') + 1:-2] # 取出日期号
od_wea_list.append(date)
# 获取天气
weather = day.find('span', {'class': 'wea'}).string # 找到天气
print(day.find('span', {'class': 'wea'}).string)
print(day.find('span', {'class': 'wea'}).text)
od_wea_list.append(weather)
# 获取温度
tem = day.find('span', {'class': 'tem'}).text # 找到温度
print(tem)
od_wea_list.append(tem[tem.index('/') + 1:-1]) # 找到最低气温
od_wea_list.append(tem[:tem.index('/') - 1]) # 找到最高气温
# 获取风向和风级
wind = day.find('span', {'class': 'wind'}).string # 找到风向
if '转' in wind: # 如果有风向变化
od_wea_list.append(wind[:wind.index('转')])
od_wea_list.append(wind[wind.index('转') + 1:])
else: # 如果没有风向变化,前后风向一致
od_wea_list.append(wind)
od_wea_list.append(wind)
wind_scale = day.find('span', {'class': 'wind1'}).string # 找到风级
index1 = wind_scale.index('级')
od_wea_list.append(int(wind_scale[index1 - 1:index1]))
week_wea_list.append(od_wea_list)
return week_wea_list
def write_to_csv(file_name, data, day=14):
"""保存为csv文件"""
if not os.path.exists(file_name):
with open(file_name, 'w', errors='ignore', newline='') as f:
if day == 14:
header = ['日期', '天气', '最低气温', '最高气温', '风向1', '风向2', '风级']
else:
header = ['小时', '温度', '风力方向', '风级', '降水量', '相对湿度', '空气质量']
f_csv = csv.writer(f)
f_csv.writerow(header)
f_csv.writerows(data)
else:
with open(file_name, 'a', errors='ignore', newline='') as f:
f_csv = csv.writer(f)
f_csv.writerows(data)
def get_two_weak_weather(city_code: str):
url_head = "http://www.weather.com.cn/weather"
url_1_7 = "".join([url_head, "/", city_code, ".shtml"])
url_8_14 = "".join([url_head, "15d", "/", city_code, ".shtml"])
html_1_7 = get_html_text(url_1_7)
data1, data1_7 = get_today_and_week(html_1_7)
html8_14 = get_html_text(url_8_14)
data8_14 = get_8_14_weather(html8_14)
data14 = data1_7 + data8_14
write_to_csv('weather14.csv', data14, 14) # 保存为csv文件
write_to_csv('weather1.csv', data1, 1)
if __name__ == '__main__':
get_two_weak_weather("101280701")