说明:原文是用jupyter写的,手动改成markdown。


文章目录

  • 一、任务1 Pytorch基本操作考察
  • 1.1 任务内容
  • 1.2 任务思路及代码
  • 1.3 实验结果分析
  • 二、任务2 实现logistics回归
  • 2.1 从零开始实现logistics回归
  • 2.2 利用torch.nn完成logistics回归
  • 三、任务3 实现softmax
  • 3.1 从零开始实现softmax
  • 3.2 利用torch.nn实现 softmax
  • A1 实验心得
  • A2 参考文献


一、任务1 Pytorch基本操作考察

1.1 任务内容

见实验要求

1.2 任务思路及代码

简单实现即可

import torch
import numpy as np

M = torch.rand(1, 3)
N = torch.rand(2, 1)
print("M", M)
print("N", N)
print("M - N", M - N)
print("M.subtract(N)", M.subtract(N))
print("torch.subtract(M, N)", torch.subtract(M, N))

1.3 实验结果分析

第一种 调用tensor类的运算符重载直接计算
第二种 是类函数
第三种是torch的外部函数
运算过程中出现广播,最后都变成了 shape 2*3 矩阵减法

P = torch.normal(0, 0.01, size=(3, 2))
Q = torch.normal(0, 0.01, size=(4, 2))
print("P",P)
print('Q',Q)
print("the inverse of Q")
print(Q.T)
print("P dot Q")
P@Q.T
x = torch.ones(1, requires_grad=True)
y_1 = x * x
y_2 = (x*x*x).detach()
y_3 = y_2 + y_1
y_3.backward()
x.grad

在 x^3 使用detach 相当于 对于 y3来说 y2是个常量 不求其导数,最后只有y2在反向传播计算图中被计算了。

二、任务2 实现logistics回归

2.1 从零开始实现logistics回归

# 从零开始实现logistics回归

import torch
import numpy as np

def mylogistics(X):
    z =  X.matmul(w.T) + b
    return torch.sigmoid(z)
    # result = torch.where(f>0.5, 1.0, 0.0)


loss = torch.nn.BCEWithLogitsLoss()

def mySGD(params, lr):
    with torch.no_grad():
        for param in params:
            param -= lr*param.grad
            param.grad.zero_()

x = torch.randint(-100,100, size=(100, 2), dtype=torch.float32)
label = []
for i in range(x.shape[0]):
    if x[i, 0] < x[i, 1]:
        label.append(1.0)
    else:
        label.append(0.0)

y = torch.tensor(label)
y = y.reshape((-1, 1))
print(x.shape)
print(y.shape)
def acc_logistics(y_hat, y):
    result = torch.where(y_hat>=0.5, 1, 0)
    return (result == y).type(y_hat.dtype).float().sum() / len(y)
y_hat_tem = torch.tensor([0.3, 0.9])
y_tem = torch.tensor([1, 0])
print(acc_logistics(y_hat_tem, y_tem).item())
def train_logistics_with_nothing(epochs, net, loss, lr, x, y, w, b):
    for epoch in range(epochs):
        y_hat = net(x)
        # print(y_hat)
        # print(y_hat.shape)
        acc = acc_logistics(y_hat, y)
        #  need a ()
        l = loss()(y_hat, y)
        l.backward()
        mySGD([w, b], lr)
        if ((epoch+1) % 100==0):
            print("epoch", epoch, " loss is", l.item(), " acc is", acc.item())
x = torch.randint(-100,100, size=(100, 2), dtype=torch.float32)
label = []
for i in range(x.shape[0]):
    if x[i, 0] < x[i, 1]:
        label.append(1.0)
    else:
        label.append(0.0)

w = torch.normal(0, 0.01, size=(1, x.shape[1]), dtype=torch.float32, requires_grad=True)
b = torch.zeros(1, requires_grad=True)

y = torch.tensor(label)
y = y.reshape((-1, 1))
print(x.shape)
print(y.shape)

epochs = 1000
lr = 0.001
net = mylogistics

train_logistics_with_nothing(1000, mylogistics, torch.nn.BCELoss, lr, x, y, w,

2.2 利用torch.nn完成logistics回归

# 利用torch.nn完成logistics回归
# 这里利用torch.nn.Module 的继承写了个新类,其实只要有个nn.Linear层加个sigmod就好。甚至好像BCEwithLogistics函数自带一个sigmode层。

import torch
import torch.nn as nn

# 函数

class myLogisticsWithNN(nn.Module):
    def __inti__(self):
        super().__init__()

    def forward(self, X):
            z =  X.matmul(w.T) + b
            return torch.sigmoid(z)

def acc_logistics(y_hat, y):
    result = torch.where(y_hat>=0.5, 1, 0)
    return (result == y).type(y_hat.dtype).float().sum() / len(y)


def mySGD(params, lr):
    with torch.no_grad():
        for param in params:
            param -= lr*param.grad
            param.grad.zero_()


def train_logistics_with_nn(epochs, net, loss, lr, x, y, w, b):
    for epoch in range(epochs):
        y_hat = net().forward(x)
        # print(y_hat)
        # print(y_hat.shape)
        acc = acc_logistics(y_hat, y)
        #  need a ()
        l = loss()(y_hat, y)
        l.backward()
        mySGD([w, b], lr)
        if ((epoch+1) % 100==0):
            print("epoch", epoch, " loss is", l.item(), " acc is", acc.item())
# 数据随件初始化和训练

x = torch.randint(-100,100, size=(100, 2), dtype=torch.float32)
label = []
for i in range(x.shape[0]):
    if x[i, 0] < x[i, 1]:
        label.append(1.0)
    else:
        label.append(0.0)

w = torch.normal(0, 0.01, size=(1, x.shape[1]), dtype=torch.float32, requires_grad=True)
b = torch.zeros(1, requires_grad=True)

y = torch.tensor(label)
y = y.reshape((-1, 1))
print(x.shape)
print(y.shape)

epochs = 1000
lr = 0.001
net = mylogistics

train_logistics_with_nn(epochs, myLogisticsWithNN, nn.BCELoss, lr, x, y, w, b)

三、任务3 实现softmax

3.1 从零开始实现softmax

# 从零开始实现softmax
import torch
import torchvision
from torch.utils.data import DataLoader
from torchvision import transforms
import numpy as np
minst_train = torchvision.datasets.FashionMNIST(root="./Datasets/FashionMNIST", train=True, download=True, transform=transforms.ToTensor())
minst_test = torchvision.datasets.FashionMNIST(root="./Datasets/FashionMNIST", train=False, download=True, transform=transforms.ToTensor())

batch_size = 256
train_iter = DataLoader(minst_train, batch_size=batch_size, shuffle=True)
test_iter = DataLoader(minst_test, batch_size=batch_size, shuffle=False)
num_inputs = 784 #image of fashion minst is 28*28
num_outputs = 10 # and the num of class is 10

W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)
b = torch.zeros(num_outputs, requires_grad=True)
def softmax(X):
    X_exp = torch.exp(X)
    return X_exp / X_exp.sum(1, keepdim=True)
def net(X):
    return softmax(torch.matmul(X.reshape((-1, W.shape[0])), W) + b)
def cross_entropy(y_hat, y):
    return (- torch.log(y_hat[range(len(y_hat)), y])).mean()
# to gain a scalar
# for-loop is slow. we use a slice of y_hat to gain the all right class' prob, then log them.
def accuracy(y_hat, y):
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1)
    result = y_hat.type(y.dtype) == y
    # op== is very sensitive with data type, so call tensor.type() to stay the same with y
    return float(result.type(y.dtype).sum())/len(y)
def mysgd(params, lr, batch_size):
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size
            param.grad.zero_()
def eval_acc(net, data_iter):
    result = []
    with torch.no_grad():
        for X, y in data_iter:
            result.append(accuracy(net(X), y))
    return np.array(result).mean()
# def train_softmax(net, train_iter, loss, updater):
updater = torch.optim.SGD([W, b], lr=0.001)
epochs = 10
for epoch in range(epochs):
    for X, y in train_iter:
        # print(X.shape)
        # print(X)
        # print(y.shape)
        y_hat = net(X)
        # print(y_hat)
        # print(y)
        l = cross_entropy(y_hat, y)
        # print(l)
        updater.zero_grad()
        l.backward()
        updater.step()
    print("epoch", epoch, "train loss", l.item(), "train acc", accuracy(y_hat, y)

3.2 利用torch.nn实现 softmax

# 这里参考了 李沐学ai的 3.7. softmax回归的简洁实现 里面的trick,即logsumexp技巧,在交叉熵里变相完成了softmax,这样避免上溢和下溢;而不是像上边一样利用nn.module实现一个新的class softmax。byt,李沐书中的这个方法真的太强了。

import torch
from torch import nn
import torchvision
from torch.utils.data import DataLoader
from torchvision import transforms
import numpy as np
minst_train = torchvision.datasets.FashionMNIST(root="./Datasets/FashionMNIST", train=True, download=True, transform=transforms.ToTensor())
minst_test = torchvision.datasets.FashionMNIST(root="./Datasets/FashionMNIST", train=False, download=True, transform=transforms.ToTensor())

batch_size = 256
train_iter = DataLoader(minst_train, batch_size=batch_size, shuffle=True)
test_iter = DataLoader(minst_test, batch_size=batch_size, shuffle=False)

loss1 = torch.nn.CrossEntropyLoss(reduction='none')
net1 = torch.nn.Sequential(torch.nn.Flatten(), torch.nn.Linear(784, 10))
updater1 = torch.optim.SGD(net1.parameters(), lr=0.1)

def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)

net1.apply(init_weights)

epochs = 10
def train_softmax_with_nn(epoch):
    for epoch in range(epochs):
        for X, y in train_iter:
            y_hat = net1(X)
            # print(y_hat)
            l = loss1(y_hat, y)
            updater1.zero_grad()
            l.mean().backward()
            updater1.step()
        print("epoch", epoch, "train loss", l.mean().item(), "train acc", accuracy(y_hat, y), "test_acc", eval_acc(net, test_iter))
train_softmax_with_nn(epochs)

A1 实验心得

在重名传递时,有时应该传递函数地址,有时又应该新建一个对象,直接传递,这里十分容易出错。比如在BCEloss中,一开始就是简单传递地址,结果爆出 不止一个可以比较的对象 之类的错误,由于经验不足简直无法入手。最后幸好,在同学的帮助下定位到了错误。
一开始,从零开始实现logistics回归用了BCEwithLogistics损失函数,发现loss一直不降,但是acc接近1.0。查询文档后知道BCELogistics自带一个sigmod函数,可以直接配合Linear函数使用。遂换成了BCEloss函数。
softmax回归中通过参数类型转换很好的实现了acc函数。由bool转为float 直接记出正确的分类。