1 导入实验需要的包



import numpy as np
import pandas as pd
import torch
from torch import nn
import matplotlib.pyplot as plt
from torch.autograd import Variable
from torch.utils.data import DataLoader,TensorDataset
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"


2 加载数据集



data =pd.read_csv("./dataset/credit-a.csv")
X = data.iloc[:,:-1]
Y = data.iloc[:,-1]


3 数据转换



Y = Y.replace(-1,0)  #替换标签 -1 ,改成 0 
Y.unique() #查看有几种标签数据
#数据转换为Tensor
X = torch.from_numpy(X.values).type(torch.float32)
Y = torch.from_numpy(Y.values.reshape(-1,1)).type(torch.float32)


4 设置迭代器,将数据转换在cuda下



def load_data(X,Y,batch_size):
X = torch.autograd.Variable(X).cuda()
Y = torch.autograd.Variable(Y).cuda()
print("type x",X)
data = TensorDataset(X,Y)
data_loader = DataLoader(data,batch_size,shuffle=True)
# data_loader = DataLoader(data,batch_size,shuffle=False)
return data_loader


5 参数初始化及读取数据



batchs_size = 16
data_iter = load_data(X,Y,batchs_size)
#最后的代码用到了,cpu()那行
X = torch.autograd.Variable(X).cuda()
Y = torch.autograd.Variable(Y).cuda()


6 设置模型



model = nn.Sequential()
model.add_module('mylinear1',nn.Linear(15,1))
model.add_module('mysigmoid',nn.Sigmoid())
if torch.cuda.is_available():
model.cuda()


7 设置损失函数



loss_fn = nn.BCELoss()


8 设置优化器



optimizer = torch.optim.Adam(model.parameters(),lr=0.0001)
# optimizer = torch.optim.SGD(model.parameters(),lr=0.001)


9 训练模型



epochs = 150
for epoch in range(epochs):
correct = 0
for x,y in data_iter:
y_hat = model(x)
loss = loss_fn(y_hat,y)
# print(y_hat.ge(0.5))
out = y_hat.ge(0.5).float() # 以0.5为阈值进行分类
correct += (out == y).sum() # 计算正确预测的样本个数
optimizer.zero_grad()
loss.backward()
optimizer.step()
print("epoch %d ,current acc = %.4f"%(epoch+1,correct/Y.shape[0]))
print("epoch %d ,current loss = %.4f"%(epoch+1,loss))


10 其他



# ((model(X).data.numpy()> 0.5).astype('int')==Y.numpy()).mean()
((model(X).data.cpu().numpy()> 0.5).astype('int')==Y.cpu().numpy()).mean()


 



model(X).data.cpu()