说明:原文是用jupyter写的,手动改成markdown。
文章目录
- 一、任务1 Pytorch基本操作考察
- 1.1 任务内容
- 1.2 任务思路及代码
- 1.3 实验结果分析
- 二、任务2 实现logistics回归
- 2.1 从零开始实现logistics回归
- 2.2 利用torch.nn完成logistics回归
- 三、任务3 实现softmax
- 3.1 从零开始实现softmax
- 3.2 利用torch.nn实现 softmax
- A1 实验心得
- A2 参考文献
一、任务1 Pytorch基本操作考察
1.1 任务内容
见实验要求
1.2 任务思路及代码
简单实现即可
import torch
import numpy as np
M = torch.rand(1, 3)
N = torch.rand(2, 1)
print("M", M)
print("N", N)
print("M - N", M - N)
print("M.subtract(N)", M.subtract(N))
print("torch.subtract(M, N)", torch.subtract(M, N))
1.3 实验结果分析
第一种 调用tensor类的运算符重载直接计算
第二种 是类函数
第三种是torch的外部函数
运算过程中出现广播,最后都变成了 shape 2*3 矩阵减法
P = torch.normal(0, 0.01, size=(3, 2))
Q = torch.normal(0, 0.01, size=(4, 2))
print("P",P)
print('Q',Q)
print("the inverse of Q")
print(Q.T)
print("P dot Q")
P@Q.T
x = torch.ones(1, requires_grad=True)
y_1 = x * x
y_2 = (x*x*x).detach()
y_3 = y_2 + y_1
y_3.backward()
x.grad
在 x^3 使用detach 相当于 对于 y3来说 y2是个常量 不求其导数,最后只有y2在反向传播计算图中被计算了。
二、任务2 实现logistics回归
2.1 从零开始实现logistics回归
# 从零开始实现logistics回归
import torch
import numpy as np
def mylogistics(X):
z = X.matmul(w.T) + b
return torch.sigmoid(z)
# result = torch.where(f>0.5, 1.0, 0.0)
loss = torch.nn.BCEWithLogitsLoss()
def mySGD(params, lr):
with torch.no_grad():
for param in params:
param -= lr*param.grad
param.grad.zero_()
x = torch.randint(-100,100, size=(100, 2), dtype=torch.float32)
label = []
for i in range(x.shape[0]):
if x[i, 0] < x[i, 1]:
label.append(1.0)
else:
label.append(0.0)
y = torch.tensor(label)
y = y.reshape((-1, 1))
print(x.shape)
print(y.shape)
def acc_logistics(y_hat, y):
result = torch.where(y_hat>=0.5, 1, 0)
return (result == y).type(y_hat.dtype).float().sum() / len(y)
y_hat_tem = torch.tensor([0.3, 0.9])
y_tem = torch.tensor([1, 0])
print(acc_logistics(y_hat_tem, y_tem).item())
def train_logistics_with_nothing(epochs, net, loss, lr, x, y, w, b):
for epoch in range(epochs):
y_hat = net(x)
# print(y_hat)
# print(y_hat.shape)
acc = acc_logistics(y_hat, y)
# need a ()
l = loss()(y_hat, y)
l.backward()
mySGD([w, b], lr)
if ((epoch+1) % 100==0):
print("epoch", epoch, " loss is", l.item(), " acc is", acc.item())
x = torch.randint(-100,100, size=(100, 2), dtype=torch.float32)
label = []
for i in range(x.shape[0]):
if x[i, 0] < x[i, 1]:
label.append(1.0)
else:
label.append(0.0)
w = torch.normal(0, 0.01, size=(1, x.shape[1]), dtype=torch.float32, requires_grad=True)
b = torch.zeros(1, requires_grad=True)
y = torch.tensor(label)
y = y.reshape((-1, 1))
print(x.shape)
print(y.shape)
epochs = 1000
lr = 0.001
net = mylogistics
train_logistics_with_nothing(1000, mylogistics, torch.nn.BCELoss, lr, x, y, w,
2.2 利用torch.nn完成logistics回归
# 利用torch.nn完成logistics回归
# 这里利用torch.nn.Module 的继承写了个新类,其实只要有个nn.Linear层加个sigmod就好。甚至好像BCEwithLogistics函数自带一个sigmode层。
import torch
import torch.nn as nn
# 函数
class myLogisticsWithNN(nn.Module):
def __inti__(self):
super().__init__()
def forward(self, X):
z = X.matmul(w.T) + b
return torch.sigmoid(z)
def acc_logistics(y_hat, y):
result = torch.where(y_hat>=0.5, 1, 0)
return (result == y).type(y_hat.dtype).float().sum() / len(y)
def mySGD(params, lr):
with torch.no_grad():
for param in params:
param -= lr*param.grad
param.grad.zero_()
def train_logistics_with_nn(epochs, net, loss, lr, x, y, w, b):
for epoch in range(epochs):
y_hat = net().forward(x)
# print(y_hat)
# print(y_hat.shape)
acc = acc_logistics(y_hat, y)
# need a ()
l = loss()(y_hat, y)
l.backward()
mySGD([w, b], lr)
if ((epoch+1) % 100==0):
print("epoch", epoch, " loss is", l.item(), " acc is", acc.item())
# 数据随件初始化和训练
x = torch.randint(-100,100, size=(100, 2), dtype=torch.float32)
label = []
for i in range(x.shape[0]):
if x[i, 0] < x[i, 1]:
label.append(1.0)
else:
label.append(0.0)
w = torch.normal(0, 0.01, size=(1, x.shape[1]), dtype=torch.float32, requires_grad=True)
b = torch.zeros(1, requires_grad=True)
y = torch.tensor(label)
y = y.reshape((-1, 1))
print(x.shape)
print(y.shape)
epochs = 1000
lr = 0.001
net = mylogistics
train_logistics_with_nn(epochs, myLogisticsWithNN, nn.BCELoss, lr, x, y, w, b)
三、任务3 实现softmax
3.1 从零开始实现softmax
# 从零开始实现softmax
import torch
import torchvision
from torch.utils.data import DataLoader
from torchvision import transforms
import numpy as np
minst_train = torchvision.datasets.FashionMNIST(root="./Datasets/FashionMNIST", train=True, download=True, transform=transforms.ToTensor())
minst_test = torchvision.datasets.FashionMNIST(root="./Datasets/FashionMNIST", train=False, download=True, transform=transforms.ToTensor())
batch_size = 256
train_iter = DataLoader(minst_train, batch_size=batch_size, shuffle=True)
test_iter = DataLoader(minst_test, batch_size=batch_size, shuffle=False)
num_inputs = 784 #image of fashion minst is 28*28
num_outputs = 10 # and the num of class is 10
W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)
b = torch.zeros(num_outputs, requires_grad=True)
def softmax(X):
X_exp = torch.exp(X)
return X_exp / X_exp.sum(1, keepdim=True)
def net(X):
return softmax(torch.matmul(X.reshape((-1, W.shape[0])), W) + b)
def cross_entropy(y_hat, y):
return (- torch.log(y_hat[range(len(y_hat)), y])).mean()
# to gain a scalar
# for-loop is slow. we use a slice of y_hat to gain the all right class' prob, then log them.
def accuracy(y_hat, y):
if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
y_hat = y_hat.argmax(axis=1)
result = y_hat.type(y.dtype) == y
# op== is very sensitive with data type, so call tensor.type() to stay the same with y
return float(result.type(y.dtype).sum())/len(y)
def mysgd(params, lr, batch_size):
with torch.no_grad():
for param in params:
param -= lr * param.grad / batch_size
param.grad.zero_()
def eval_acc(net, data_iter):
result = []
with torch.no_grad():
for X, y in data_iter:
result.append(accuracy(net(X), y))
return np.array(result).mean()
# def train_softmax(net, train_iter, loss, updater):
updater = torch.optim.SGD([W, b], lr=0.001)
epochs = 10
for epoch in range(epochs):
for X, y in train_iter:
# print(X.shape)
# print(X)
# print(y.shape)
y_hat = net(X)
# print(y_hat)
# print(y)
l = cross_entropy(y_hat, y)
# print(l)
updater.zero_grad()
l.backward()
updater.step()
print("epoch", epoch, "train loss", l.item(), "train acc", accuracy(y_hat, y)
3.2 利用torch.nn实现 softmax
# 这里参考了 李沐学ai的 3.7. softmax回归的简洁实现 里面的trick,即logsumexp技巧,在交叉熵里变相完成了softmax,这样避免上溢和下溢;而不是像上边一样利用nn.module实现一个新的class softmax。byt,李沐书中的这个方法真的太强了。
import torch
from torch import nn
import torchvision
from torch.utils.data import DataLoader
from torchvision import transforms
import numpy as np
minst_train = torchvision.datasets.FashionMNIST(root="./Datasets/FashionMNIST", train=True, download=True, transform=transforms.ToTensor())
minst_test = torchvision.datasets.FashionMNIST(root="./Datasets/FashionMNIST", train=False, download=True, transform=transforms.ToTensor())
batch_size = 256
train_iter = DataLoader(minst_train, batch_size=batch_size, shuffle=True)
test_iter = DataLoader(minst_test, batch_size=batch_size, shuffle=False)
loss1 = torch.nn.CrossEntropyLoss(reduction='none')
net1 = torch.nn.Sequential(torch.nn.Flatten(), torch.nn.Linear(784, 10))
updater1 = torch.optim.SGD(net1.parameters(), lr=0.1)
def init_weights(m):
if type(m) == nn.Linear:
nn.init.normal_(m.weight, std=0.01)
net1.apply(init_weights)
epochs = 10
def train_softmax_with_nn(epoch):
for epoch in range(epochs):
for X, y in train_iter:
y_hat = net1(X)
# print(y_hat)
l = loss1(y_hat, y)
updater1.zero_grad()
l.mean().backward()
updater1.step()
print("epoch", epoch, "train loss", l.mean().item(), "train acc", accuracy(y_hat, y), "test_acc", eval_acc(net, test_iter))
train_softmax_with_nn(epochs)
A1 实验心得
在重名传递时,有时应该传递函数地址,有时又应该新建一个对象,直接传递,这里十分容易出错。比如在BCEloss中,一开始就是简单传递地址,结果爆出 不止一个可以比较的对象 之类的错误,由于经验不足简直无法入手。最后幸好,在同学的帮助下定位到了错误。
一开始,从零开始实现logistics回归用了BCEwithLogistics损失函数,发现loss一直不降,但是acc接近1.0。查询文档后知道BCELogistics自带一个sigmod函数,可以直接配合Linear函数使用。遂换成了BCEloss函数。
softmax回归中通过参数类型转换很好的实现了acc函数。由bool转为float 直接记出正确的分类。