pytorch 网络loss过大 pytorch输出网络参数

转载

mob64ca14122c74 2024-08-05 08:00:25

文章标签 pytorch 网络loss过大神经网络 python pytorch 2d 文章分类 PyTorch 人工智能

在数据集小的情况下，可使用大数据集对卷积神经网络进行预训练，在dropout论文中提到，经过训练后的卷积层是一个泛化的特征提取器。在pytorch中如何将已训练完成的网络参数导入自己设计网络中使用。

本文以VGG16网络为例。网络架构如下（打对勾那个）：

pytorch 网络loss过大 pytorch输出网络参数_python

步骤如下：

下载网络已训练好的参数文件
参数模型下载地址本文中下载文件是imagenet-vgg-verydeep-16.mat
加载所下载模型参数（可在Debug下可视化查看）

data = scipy.io.loadmat('imagenet-vgg-verydeep-16.mat')
print(data)

输出结果

{'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Fri Sep 30 08:35:50 2016', '__version__': '1.0', '__globals__': [], 'layers': array([[array([[(array(['conv1_1'], dtype='<U7'), array(['conv'], dtype='<U4'), array([[array([[[[ 4.80015397e-01, -1.72696680e-01,  3.75577137e-02, ...,
          -1.27135560e-01, -5.02991639e-02,  3.48965675e-02],
....此处省略卷积核权重数据，应为[3,3,3,64]维数据
[[-4.18516338e-01, -1.57048807e-01, -1.49133086e-01, ...,
          -1.56839803e-01, -1.42874300e-01, -2.69694626e-01],
         [-4.85030204e-01,  4.23195846e-02, -1.12076312e-01, ...,
          -1.18306056e-01, -1.67058021e-01, -3.22241962e-01],
         [-3.50096762e-01,  1.38710454e-01, -1.25339806e-01, ...,
          -1.53092295e-01, -1.39917329e-01, -2.65075237e-01]]]],
dtype=float32),
        array([[ 0.73429835],
       [ 0.09340367],
       [ 0.06775674],
       [ 0.8862966 ],
....此时省略偏置权重，为[1,64]维数据
 [ 1.302014  ],
       [ 0.5306885 ],
       [ 0.48993504]], dtype=float32)]], dtype=object), array([[ 3,  3,  3, 64]], dtype=uint8), array([[1, 1, 1, 1]], dtype=uint8), array([[1, 1]], dtype=uint8), array([[0]], dtype=uint8), array([[1]], dtype=uint8), array([], shape=(0, 0), dtype=object))]],
      dtype=[('name', 'O'), ('type', 'O'), ('weights', 'O'), ('size', 'O'), ('pad', 'O'), ('stride', 'O'), ('precious', 'O'), ('dilate', 'O'), ('opts', 'O')]),
        array([[(array(['relu1_1'], dtype='<U7'), array(['relu'], dtype='<U4'), array([[0]], dtype=uint8), array([], shape=(0, 0), dtype=object), array([[0]], dtype=uint8))]],
....此时省略其他层数据信息，只显示了一个conv层和relu层

从输出结果可看出加载参数文件数据结构，参数值以array存储，并将整个网络结构全部包括，conv、relu、fc，当网络结构不一致时应取自己需要的部分参数值。
3. 查看模型中的关键字信息

print(data.keys()) 
    # dict_keys(['__header__', '__version__', '__globals__', 'layers', 'classes', 'normalization'])
    # 其中'classes'是imageNet中的1000个类的对应标识和描述
    # 'layers'内部是42个元素，每个元素又是1*2的结构，分别是w权值和b权值
    # 'normalize'，输入图像的标准化参数

加载权重

weights = data['layers'][0]
#weights[0][0][0][0]=['conv1_1']
#weights[0][0][0][1]=['conv']
#weights[0][0][0][2][0]=kernels, bias

此处需要自行尝试weight不同索引所代表的值，上面为本文下载文件的一些参数值的获取方式
6. 使用对应层参数对自己网络进行初始化
（1）卷积层设置是一样的才可以使用，例如下载文件中第一层卷积为 torch.nn.Conv2d(3, 64,3,1,1)，若自己网络第一层卷积为torch.nn.Conv2d(1, 64,3,1,1)，则不可使用
（2）参数维度方式不同，需要进行转换
matconvnet: weights are [width, height, in_channels, out_channels]
pytorchnet：weights are [out_channels,in_channels,width, height ]
（3）文件存储使用array，而网络运行中使用Tensor

维度转换代码

kernels=kernels.transpose((3,2,0,1))
bias=bias.transpose(1,0)[0] #偏置的尺寸为64，而加载的偏置尺寸为1*64
kernels=torch.Tensor(kernels)
bias=torch.Tensor(bias)

完整代码

import torch
from torch.autograd import Variable
from torch.nn import init
import scipy.io
from utils_.util import read_csv
import nibabel as nib
from torch import optim

# layers = (
#         'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
#         'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
#         'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
#         'relu3_3', 'pool3',
#         'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
#         'relu4_3', 'pool4',
#         'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
#         'relu5_3')


def layer(inchannel,outchannel,kernel,stride,padding):
    layer = torch.nn.Sequential(
        torch.nn.Conv2d(inchannel, outchannel,kernel,stride,padding),
        torch.nn.ReLU()
    )
    return layer

def fc_layer(size_in, size_out):
    layer = torch.nn.Sequential(
        torch.nn.Linear(size_in, size_out),
        torch.nn.Dropout(0.5),
        torch.nn.ReLU()
    )
    return layer
class VGG(torch.nn.Module):
    def __init__(self,inchannel=1,classes=3):
        super(VGG,self).__init__()
        self.name='VGG'
        #load weight
        data = scipy.io.loadmat('imagenet-vgg-verydeep-16.mat')
        self.weights = data['layers'][0]

        self.conv1_1=layer(inchannel,64,3,1,1)
        self.conv1_2=layer(64,64,3,1,1)
        self.maxpool1=torch.nn.MaxPool2d(2,2)

        self.conv2_1=layer(64,128,3,1,1)
        self.conv2_2 = layer(128, 128, 3, 1, 1)
        self.maxpool2 = torch.nn.MaxPool2d(2, 2)

        self.conv3_1 = layer(128, 256, 3, 1, 1)
        self.conv3_2 = layer(256, 256, 3, 1, 1)
        self.conv3_3 = layer(256, 256, 3, 1, 1)
        self.maxpool3 = torch.nn.MaxPool2d(2, 2)

        self.conv4_1 = layer(256, 512, 3, 1, 1)
        self.conv4_2 = layer(512, 512, 3, 1, 1)
        self.conv4_3 = layer(512, 512, 3, 1, 1)
        self.maxpool4 = torch.nn.MaxPool2d(2, 2)

        self.conv5_1 = layer(512, 512, 3, 1, 1)
        self.conv5_2 = layer(512, 512, 3, 1, 1)
        self.conv5_3 = layer(512, 512, 3, 1, 1)
        self.maxpool5 = torch.nn.MaxPool2d(2, 2)

        self.fc1=fc_layer(512*16*16,2048)
        self.fc2=fc_layer(2048,1024)
        self.fc3=torch.nn.Linear(1024, classes)
        #网络初始化，设置计数器i，记录自己网络中第几层，然后可直接取对应参数值
        i=0
        for m in self.children():
            if m.__class__.__name__.find('Sequential')!=-1:
                for s in m.children():
                    if s.__class__.__name__.find('Conv') != -1: 
                    #我的网络第一层和下载网络结构的第一层不同，所以我直接使用凯明初始化了                       
                        if i==0:
                            init.kaiming_normal(s.weight.data, a=0, mode='fan_in')
                        else:
                            kernels, bias=self.load_weightmat(i)
                            s.weight=torch.nn.Parameter(kernels)
                            s.bias=torch.nn.Parameter(bias)
                    elif s.__class__.__name__.find('Linear') != -1:
                        init.kaiming_normal(s.weight.data, a=0, mode='fan_in')
                        init.constant(s.bias.data, 0.0)
                    elif s.__class__.__name__.find('BatchNorm') != -1:
                        init.normal(s.weight.data, 1.0, 0.02)
                        init.constant(s.bias.data, 0.0)
                    i+=1
            else: i+=1

    def forward(self, x):
        B,C,H,W=x.size()
        out = self.conv1_1(x)
        out = self.conv1_2(out)
        out=self.maxpool1(out)

        out = self.conv2_1(out)
        out = self.conv2_2(out)
        out=self.maxpool2(out)

        out = self.conv3_1(out)
        out = self.conv3_2(out)
        out = self.conv3_3(out)
        out = self.maxpool3(out)

        out = self.conv4_1(out)
        out = self.conv4_2(out)
        out = self.conv4_3(out)
        out = self.maxpool4(out)

        out = self.conv5_1(out)
        out = self.conv5_2(out)
        out = self.conv5_3(out)
        out = self.maxpool5(out)
        out=out.view(B,-1)

        out = self.fc1(out)
        out = self.fc2(out)
        out = self.fc3(out)
        out = self.softmax(out)
        return out

    def load_weightmat(self,i):
        """
        weights[0][0][0][0]=['conv1_1']
        weights[0][0][0][1]=['conv']
        weights[0][0][0][2][0]=kernels, bias
        :param i:
        :return:
        """
        name=self.weights[i][0][0][1]
        if name != 'conv':
            raise Exception("weight error matching")
        kernels, bias = self.weights[i][0][0][2][0]
        kernels=kernels.transpose((3,2,0,1))
        bias=bias.transpose(1,0)[0] #偏置的尺寸为64，而加载的偏置尺寸为1*64
        kernels=torch.Tensor(kernels)
        bias=torch.Tensor(bias)
        return kernels, bias

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。