数据介绍:
时间戳 总功率 用电器1 用电器3 用电器7 其他功率
任务描述:
用 时间戳 、总功率、其他功率预测用电器1、用电器3、 用电器7 的功率
一共是30天的数据,前30天作为训练,最后一天作测试。输入 时间戳 、总功率、其他功率
预测用电器1、用电器3、 用电器7 的功率 。
代码:
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
import re
import gensim
from sklearn import preprocessing
import jieba
import random
import torch
from torch import nn
import torch.utils.data as data
import torch.nn.functional as F
from torch import tensor
from sklearn.model_selection import train_test_split
from gensim.models import KeyedVectors
from gensim.scripts.glove2word2vec import glove2word2vec
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
from datetime import datetime
import time
import math
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
import os
import tensorflow as tf
from scipy import stats, integrate
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
%matplotlib inline
import random
import warnings
warnings.filterwarnings("ignore")
from datetime import datetime, timedelta
plt.rcParams['font.sans-serif']=['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False # 用来正常显示负号
data=pd.read_excel("house5_October.xlsx")# 1 3 7 是 预测列
data=data[['Time', 'Unix', 'Aggregate', 'Appliance1', 'Appliance3','Appliance7','Others']]
data.head()
# data.corr()
ax=plt.subplots(figsize=(10,10))
ax=sns.heatmap(data.corr(),vmax=.8,square=True,annot=True)
划分验证集测试集:
data_values=data.values
def data_qiefen(data_time):
data_0_=[]
for line in data_values:
if str(line[0])[8:10]==data_time:
data_0_.append(list(line[1:]))
return data_0_[0:10000]
data_x=[]
data_y=[]
for i in ["01","02","03","04","05","06","07","08","09","10","11"]:# "07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31"
data_0_=data_qiefen(i)
one_day_train=[]
one_day_test=[]
for line in data_0_:
one_train_hang=[]
one_test_hang=[]
one_train_hang.append(line[0])
one_train_hang.append(line[1])
one_train_hang.append(line[5])
one_test_hang=line[2:5]
one_day_train.append(one_train_hang)
one_day_test.append(one_test_hang)
data_x.append(one_day_train)
data_y.append(one_day_test)
train_x= data_x[0:-1]
train_y=data_y[0:-1]
test_x=data_x[-1]
test_y=data_y[-1]
# 画图:
# print("用电器3")
for i in train_y:
y1=[]
for ii in i[2000:6000]:
y1.append(ii[2])
fig = plt.figure(facecolor='white', figsize=(5, 5))
plt.xlabel('时间节点')
plt.ylabel('功率')
plt.xlim(xmax=len(y1), xmin=0)
plt.ylim(ymax=max(y1), ymin=0)
# 画两条(0-9)的坐标轴并设置轴标签x,y
x1 = [i for i in range(0, len(y1), 1)] # 随机产生300个平均值为2,方差为1.2的浮点数,即第一簇点的x轴坐标
y1 = y1 # 随机产生300个平均值为2,方差为1.2的浮点数,即第一簇点的y轴坐标
colors1 = '#00CED4' # 点的颜色
colors2 = '#DC143C'
area = np.pi * 1 ** 1 # 点面积
# 画散点图
plt.plot(x1, y1, c=colors1, alpha=0.5, label='用电器1')
# plt.scatter(x1, y1, s=area, c=colors1, alpha=0.4, label='用电器1')
# plt.plot([0,9.5],[9.5,0],linewidth = '0.5',color='#000000')
plt.legend()
plt.show()
# 搭建模型
#### model stracture ####
class PositionalEncoding(nn.Module):
def __init__(self, d_model, max_len=10000):# 位置编码的输入是一个三维的的 这里的max_len=1100 要大于=输入矩阵的第一个维度
super(PositionalEncoding, self).__init__()
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 3).float() * (-math.log(10000.0) / d_model))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0).transpose(0, 1)
self.register_buffer('pe', pe)
def forward(self, x):
return x + self.pe[:x.size(0), :]
class transformer_encode_gru(nn.Module):
def __init__(self, feature_size=3, num_layers=2, dropout=0.1): # 这个feature_size=200的维度要与输入transformer中的每个单元的维度是一样的
super( transformer_encode_gru, self).__init__()
self.model_type = 'Transformer'
self.src_mask = None
self.pos_encoder = PositionalEncoding(feature_size)
self.encoder_layer = nn.TransformerEncoderLayer(d_model=feature_size, nhead=1, dropout=0.1)
self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=2)
self.linear = nn.Linear(6,3)
self.init_weights() # nn.Linear 权重参数 初始化
self.src_key_padding_mask = None #解码器的mask是空 可以直接删除
self.gru = nn.LSTM(3,3,num_layers=1,bidirectional=True) # ,batch_first=True 是使用双向
self.relu=F.relu
def init_weights(self):
initrange = 0.1
self.linear.bias.data.zero_()
self.linear.weight.data.uniform_(-initrange, initrange)
def forward(self, src):
#----------------------------------------------------------------
# src=self.conv1(src)
# src = torch.squeeze(src,dim=2)
#输入进来的是4维度的数据,先卷积然后再去除一个1维度的数据,变成一个三维度的数据。 transfoemer 要求是输入三维度的数据 ,句子长度 batch大小 每个单词的维度
#其中 原始数据要求是两个维度的,所以加上batch是三个维度也正好是三维度的数据。
#----------------------------------------------------------------
# 制作mask矩阵 ---------------------------------------------------
src_key_padding_mask=np.zeros(int(src.shape[0])*int(src.shape[1]))
src_key_padding_mask=torch.tensor(src_key_padding_mask.reshape([int(src.shape[1]),int(src.shape[0])])).to(device)
#----------------------------------------------------------------
src_key_padding_mask = src_key_padding_mask.bool()
src = self.pos_encoder(src) # 位置编码
output = self.transformer_encoder(src,self.src_mask,src_key_padding_mask) # encode部分
output=self.relu(output)
gru_out,(h_n,c_n)= self.gru(output, None)
# print("gru_out.shape",gru_out.shape)
gru_out=gru_out.squeeze(1)
output=self.relu(gru_out)
output = self.linear(output)
return output
def mape(y_true, y_pred):
return np.mean(np.abs((y_pred - y_true) / y_true)) * 100
def test(model):
with torch.no_grad():
inputs=test_x
targets=test_y
# 归一化
inputs = preprocessing.scale(inputs)
targets = preprocessing.scale(targets)
inputs = torch.tensor(inputs).to(device)
targets = torch.tensor(targets).to(device)
inputs=inputs.float()
targets=targets.float()
inputs= inputs.unsqueeze(0).transpose(0,1)
outputs=model(inputs)
# print(outputs.shape) train_y=train_y.long()
loss=criterion(outputs,targets)
# print("loss:",loss)
mape_loss=mape(targets.cpu().detach().numpy(),outputs.cpu().detach().numpy())
# print("mape_loss",mape_loss)
return loss.item(),mape_loss
# torch.set_default_tensor_type(torch.DoubleTensor) # 直接设置创建的tensor类型默认为Double,如果不设置的话自动默认为float类型。ko
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = transformer_encode_gru().to(device)
epochs = 50
# best_model = None
optimizer = torch.optim.Adam(model.parameters(), lr=0.002)
criterion = torch.nn.MSELoss().to(device)# size_average = True, reduce = True
# torch.set_default_tensor_type(torch.FloatTensor)
val_mape_loss=[]
val_mse_loss=[]
train_loss=[]
best_test_loss=100000
for epoch in tqdm(range(epochs)):
train_epoch_loss=[]
for i in range(0,len(train_x),1):#
# optimizer.zero_grad()
inputs=train_x[i]
targets=train_y[i]
# 归一化
inputs = preprocessing.scale(inputs)
targets = preprocessing.scale(targets)
inputs = torch.tensor(inputs).to(device)
targets = torch.tensor(targets).to(device)
inputs=inputs.float()
targets=targets.float()
inputs= inputs.unsqueeze(0).transpose(0,1)
outputs=model(inputs)
# print(outputs.shape)
# print(targets.shape)
loss=criterion(outputs.float(),targets.float())
print("loss:",loss)
# loss=torch.tensor(loss.item(),requires_grad=True)
# mape_loss=mape(targets.cpu().detach().numpy().reshape(1,-1),outputs.cpu().detach().numpy().reshape(1,-1))
# print("mape_loss",mape_loss)
loss.backward()
optimizer.step()
train_epoch_loss.append(loss.item())
test_mse_loss,test_mape_loss=test(model)
val_mse_loss.append(test_mse_loss)
val_mape_loss.append(test_mape_loss)
train_loss.append(np.mean(np.array(train_epoch_loss)))
if test_mse_loss<best_test_loss:
best_test_loss=test_mse_loss
print("best_test_loss",best_test_loss)
best_model=model
print("np.mean(np.array(train_epoch_loss))",np.mean(np.array(train_epoch_loss))," test_mse_loss",test_mse_loss,"----------")
# torch.save(best_model.state_dict(),'best_model.pth')
# 画两条(0-9)的坐标轴并设置轴标签x,y
x1 = [i for i in range(0, len(train_loss), 1)] # 随机产生300个平均值为2,方差为1.2的浮点数,即第一簇点的x轴坐标
y1 = train_loss # 随机产生300个平均值为2,方差为1.2的浮点数,即第一簇点的y轴坐标
x2 = [i for i in range(0, len(val_mse_loss), 1)]
y2 = val_mse_loss
x3 = [i for i in range(0, len(val_mape_loss), 1)]
y3 = val_mape_loss
colors1 = '#00CED4' # 点的颜色
colors2 = '#DC143C'
colors3 = '#DC1414'
area = np.pi * 4 ** 1 # 点面积
# 画散点图
plt.scatter(x1, y1, s=area, c=colors1, alpha=0.4, label='train_loss')
plt.scatter(x2, y2, s=area, c=colors2, alpha=0.4, label='val_mse_loss')
# plt.scatter(x3, y3, s=area, c=colors3, alpha=0.4, label='val_mape_loss')
# plt.plot([0,9.5],[9.5,0],linewidth = '0.5',color='#000000')
plt.legend()
# plt.savefig(r'C:\Users\jichao\Desktop\大论文\12345svm.png', dpi=300)
plt.show()
# model = BertClassificationModel()
# model.load_state_dict(torch.load('best_model.pth'),False)
model.eval()
# 在对模型进行评估时,应该配合使用with torch.no_grad() 与 model.eval():
with torch.no_grad():
inputs=test_x
targets=test_y
# 归一化
inputs = preprocessing.scale(inputs)
targets = preprocessing.scale(targets)
inputs = torch.tensor(inputs).to(device)
targets = torch.tensor(targets).to(device)
inputs=inputs.float()
targets=targets.float()
inputs= inputs.unsqueeze(0).transpose(0,1)
outputs=model(inputs)
print(outputs.shape)
for i in range(0,3,1):
y1=[]
for line in outputs.numpy():
y1.append(line[i])
y2=[]
for line in targets.numpy():
y2.append(line[i])
fig = plt.figure(facecolor='white', figsize=(10,7))
plt.xlabel('时间节点')
plt.ylabel('功率')
# plt.xlim(xmax=len(y1), xmin=0)
# plt.ylim(ymax=max(y1), ymin=0)
# 画两条(0-9)的坐标轴并设置轴标签x,y
x1 = [i for i in range(0, len(y1), 1)] # 随机产生300个平均值为2,方差为1.2的浮点数,即第一簇点的x轴坐标
y1 = y1
x2 = [i for i in range(0, len(y2), 1)]
y2 = y2
colors1 = 'r' # 点的颜色
colors2 = 'b'
area = np.pi * 1 ** 1 # 点面积
# 画散点图
plt.plot(x1, y1, c=colors1, alpha=0.5, label='用电器'+str(i)+'预测功率')
plt.plot(x2, y2, c=colors2, alpha=0.5, label='用电器'+str(i)+'真实功率')
plt.legend()
plt.show()