天气预报获取模块

基于做了代码优化,每个函数功能更加明确,更加适合单独集成。接口也进行了优化,比网络上博主抄来抄去的源码要好一些。

改进点:获取天气拆成了多个函数,多次保存csv文件不再出现标题栏重写的情况。

天气获取模块

1.1 依赖表

包名

含义

安装方式

系统包

os

csv

json

第三方依赖

requests

爬虫模块

bs4.BeautifulSoup

网页解析对象

bs4.element.Tag

网页标签对象

1.2 全局变量表

变量名

含义

初值

1.3 函数

函数名

含义

是否接口

get_html_text

请求网页内容,无代理无head模式


get_today_weather

获取当天天气


get_1_7_weather

获取当周天气


get_today_and_week

获取当周和当天的天气


get_8_14_weather

获取下一周天气


write_to_csv

保存文件模块


1.4 接口函数

函数名

含义

get_two_weak_weather

获取两周天气

入参

含义

类型

city_code

城市代码

字符串

出参

None

1.5 代码
# weather.py
import os
import csv
import json

import requests
from bs4 import BeautifulSoup
from bs4.element import Tag


def get_html_text(url):
    """请求获得网页内容"""
    try:
        r = requests.get(url, timeout=30)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        print("成功访问")
        return r.text
    except Exception as e:
        print(e)
        print("访问错误")
        return " "


def get_today_weather(body_tag: Tag):
    td_wea_list = []  # 存放当天的数据,list
    count = 0

    def get_today_json(_tag: Tag):
        # 获取今日数据的script
        weather_div = _tag.find_all('div', {'class': 'left-div'})
        observe24h_data = weather_div[2].find('script').string

        # 将 script 数据改变成为 json 数据 (移除 var data=)
        begin_index = observe24h_data.index('=') + 1
        end_index = -2
        observe24h_data = observe24h_data[begin_index: end_index]
        observe24h_json = json.loads(observe24h_data)
        t_json = observe24h_json['od']['od2']

        # 补充空气质量
        if t_json[0]['od28'] == "":
            t_json[0]['od28'] = t_json[1]['od28']

        return t_json

    today_json = get_today_json(body_tag)
    for i in today_json:
        od_wea_list = []
        if count <= 23:
            od_wea_list.append(i['od21'])  # 添加时间
            od_wea_list.append(i['od22'])  # 添加当前时刻温度
            od_wea_list.append(i['od24'])  # 添加当前时刻风力方向
            od_wea_list.append(i['od25'])  # 添加当前时刻风级
            od_wea_list.append(i['od26'])  # 添加当前时刻降水量
            od_wea_list.append(i['od27'])  # 添加当前时刻相对湿度
            od_wea_list.append(i['od28'])  # 添加当前时刻空气质量
            print(od_wea_list)
            td_wea_list.append(od_wea_list)
        count = count + 1
    print(td_wea_list)
    return td_wea_list


def get_1_7_weather(body_tag: Tag):
    week_wea_list = list()  # 初始化一个列表保存数据,返回值
    
    # 解析7天的数据, body -> div -> ul -> ui
    data = body_tag.find('div', {'id': '7d'})
    ul = data.find_all('ul')[0]
    li = ul.find_all('li')

    for day in li:  # 遍历找到的每一个li
        # 临时存放每天的数据
        od_wea_list = list()

        # 获取日期
        date = day.find('h1').string
        date = date[0:date.index('日')]
        od_wea_list.append(date)

        # 找出li下面的p标签,有三个p标签,分别为天气,气温,风向
        inf = day.find_all('p')

        # 提取第一个p标签的值,即天气
        od_wea_list.append(inf[0].string)

        # 提取第二个p标签的值,最高气温和最低气温
        if inf[1].find('i') is None:
            tem_low = None
        else:
            tem_low = inf[1].find('i').string  # 找到最低气温

        if inf[1].find('span') is None:  # 天气预报可能没有最高气温
            tem_high = None
        else:
            tem_high = inf[1].find('span').string  # 找到最高气温

        if tem_low[-1] == '℃':
            od_wea_list.append(tem_low[:-1])
        else:
            od_wea_list.append(tem_low)

        if tem_high[-1] == '℃':
            od_wea_list.append(tem_high[:-1])
        else:
            od_wea_list.append(tem_high)

        # 提取第三个p标签的值,初始风向和转风向,风级
        wind = inf[2].find_all('span')  # 找到风向
        for j in wind:
            od_wea_list.append(j['title'])

        wind_scale = inf[2].find('i').string  # 找到风级
        index1 = wind_scale.index('级')
        od_wea_list.append(int(wind_scale[index1 - 1:index1]))

        # 添加日志
        week_wea_list.append(od_wea_list)
        # print(week_wea_list)
    return week_wea_list

    
def get_today_and_week(html: str):
    """处理得到有用信息保存数据文件"""
    bs = BeautifulSoup(html, "html.parser")  # 创建BeautifulSoup对象
    body = bs.body

    # 获取当天数据
    td_wea_list = get_today_weather(body)
    
    # 获取七天内的数据
    week_wea_list = get_1_7_weather(body)

    return td_wea_list, week_wea_list


def get_8_14_weather(html):
    week_wea_list = []
    i = 0  # 控制爬取的天数

    bs = BeautifulSoup(html, "html.parser")  # 创建BeautifulSoup对象
    body = bs.body

    data = body.find('div', {'id': '15d'})  # 找到div标签且id = 15d
    ul = data.find('ul')  # 找到所有的ul标签
    li = ul.find_all('li')  # 找到左右的li标签

    for day in li:  # 遍历找到的每一个li
        if i < 7:
            od_wea_list = list()

            # 获取日期
            date = day.find('span', {'class': 'time'}).string  # 得到日期
            date = date[date.index('(') + 1:-2]  # 取出日期号
            od_wea_list.append(date)

            # 获取天气
            weather = day.find('span', {'class': 'wea'}).string  # 找到天气
            print(day.find('span', {'class': 'wea'}).string)
            print(day.find('span', {'class': 'wea'}).text)
            od_wea_list.append(weather)

            # 获取温度
            tem = day.find('span', {'class': 'tem'}).text  # 找到温度
            print(tem)
            od_wea_list.append(tem[tem.index('/') + 1:-1])  # 找到最低气温
            od_wea_list.append(tem[:tem.index('/') - 1])  # 找到最高气温

            # 获取风向和风级
            wind = day.find('span', {'class': 'wind'}).string  # 找到风向
            if '转' in wind:  # 如果有风向变化
                od_wea_list.append(wind[:wind.index('转')])
                od_wea_list.append(wind[wind.index('转') + 1:])
            else:  # 如果没有风向变化,前后风向一致
                od_wea_list.append(wind)
                od_wea_list.append(wind)

            wind_scale = day.find('span', {'class': 'wind1'}).string  # 找到风级
            index1 = wind_scale.index('级')
            od_wea_list.append(int(wind_scale[index1 - 1:index1]))

            week_wea_list.append(od_wea_list)
    return week_wea_list


def write_to_csv(file_name, data, day=14):
    """保存为csv文件"""
    if not os.path.exists(file_name):
        with open(file_name, 'w', errors='ignore', newline='') as f:
            if day == 14:
                header = ['日期', '天气', '最低气温', '最高气温', '风向1', '风向2', '风级']
            else:
                header = ['小时', '温度', '风力方向', '风级', '降水量', '相对湿度', '空气质量']
            f_csv = csv.writer(f)
            f_csv.writerow(header)
            f_csv.writerows(data)

    else:
        with open(file_name, 'a', errors='ignore', newline='') as f:
            f_csv = csv.writer(f)
            f_csv.writerows(data)


def get_two_weak_weather(city_code: str):
    url_head = "http://www.weather.com.cn/weather"
    url_1_7 = "".join([url_head, "/", city_code, ".shtml"])
    url_8_14 = "".join([url_head, "15d", "/", city_code, ".shtml"])

    html_1_7 = get_html_text(url_1_7)
    data1, data1_7 = get_today_and_week(html_1_7)

    html8_14 = get_html_text(url_8_14)
    data8_14 = get_8_14_weather(html8_14)
    data14 = data1_7 + data8_14

    write_to_csv('weather14.csv', data14, 14)  # 保存为csv文件
    write_to_csv('weather1.csv', data1, 1)


if __name__ == '__main__':
    get_two_weak_weather("101280701")