从“天气史”网站中,爬取某地区的一段时间范围内的每天的逐小时天气数据信息。
网站原始数据
https://www.tianqishi.com/hangzhou/20240214.html
源码
import requests
from bs4 import BeautifulSoup
import datetime
def getEachDayWeather(loaction, timeStamp):
"""
获取每一天的天气数据
"""
# 1. 获取网页地址
urlHead = "https://www.tianqishi.com"
urlFoot = ".html"
weatherHtml = "{}/{}/{}{}".format(urlHead,loaction,timeStamp,urlFoot)
# 2. 提取网页中的当日天气信息
response = requests.get(weatherHtml)
html_content = response.text
soup = BeautifulSoup(html_content, "lxml")
yuBaoTable = soup.find("table", class_="yuBaoTable")
# 3. 天气信息记录
eachDayData = []
for row in yuBaoTable.find_all('tr'): # 遍历每一行
eachHourData = []
for cell in row.find_all('td'): # 遍历每一行中的每个单元格
eachHourData.append(cell.text)
eachDayData.append(eachHourData)
return eachDayData
def writeEachDayWeather(eachDayData,csvFilePath):
"""
向csv中写入每一天的天气数据
"""
with open(csvFilePath, "a") as f:
for eachHourData in eachDayData: # 遍历每一行
for data in eachHourData: # 遍历每一行中的每个单元格
f.write("{},".format(data))
f.write("\n")
f.close()
def writeTitle(csvFilePath):
title = "日期时间,气温,风向,风力,风速,气压,湿度,降水概率\n"
with open(csvFilePath, "w") as f:
f.write(title)
f.close()
def getCsvFilePath(rootPath, loaction, timeStampStart, timeStampEnd):
csvFilePath = "{}/{}_{}_to_{}.csv".format(rootPath,loaction,timeStampStart,timeStampEnd)
return csvFilePath
def getTimeStampList(timeStampStart, timeStampEnd, daysDelta=1):
timeStampStartNum = datetime.datetime.strptime(timeStampStart, "%Y%m%d")
timeStampEndNum = datetime.datetime.strptime(timeStampEnd, "%Y%m%d")
days =(timeStampEndNum - timeStampStartNum).days
timeStampList = []
for daysDelta in range(0,days+1):
timeStampStartAdd = timeStampStartNum + datetime.timedelta(days=daysDelta)
timeStampList.append(timeStampStartAdd.strftime("%Y%m%d"))
return timeStampList
if __name__ == "__main__":
timeStampStart = "20231106"
timeStampEnd = "20240204"
loaction = "hangzhou"
rootPath = "./"
timeStampList = getTimeStampList(timeStampStart,timeStampEnd)
csvFilePath = getCsvFilePath(rootPath, loaction, timeStampStart, timeStampEnd)
writeTitle(csvFilePath)
print("program starting.")
for timeStamp in timeStampList:
print("getting weather data for {}".format(timeStamp))
eachDayData = getEachDayWeather(loaction,timeStamp)
writeEachDayWeather(eachDayData,csvFilePath)
print("program finished.")
结果
上图中的风力数据显示为日期为excel显示问题,原始csv文件为正常数据:
未经作者授权,禁止转载
THE END