PyTorch复现ResNet学习笔记

一篇简单的学习笔记,实现五类花分类,这里只介绍复现的一些细节

如果想了解更多有关网络的细节,请去看论文《Deep Residual Learning for Image Recognition》

简单说明下数据集,下载链接,这里用的数据与AlexNet的那篇是一样的所以不在说明

一、环境准备

可以去看之前的一篇博客,里面写的很详细了,并且推荐了一篇炮哥的环境搭建环境

  • Anaconda3(建议使用)
  • python=3.6/3.7/3.8
  • pycharm (IDE)
  • pytorch=1.11.0 (pip package)
  • torchvision=0.12.0 (pip package)
  • cudatoolkit=11.3

二、模型搭建、训练

1.整体框图

论文里写pytorch伪代码 pytorch论文复现_卷积

 

 

 其中残差块有两种结构

两层结构用于34层以下的

论文里写pytorch伪代码 pytorch论文复现_论文里写pytorch伪代码_02

 

 

 

三层结构,用于50,101,152层的

论文里写pytorch伪代码 pytorch论文复现_ide_03

 

 

 

虚线和实线的残差结构,实线:输出和输入维度是一样的,虚线:输入和输出维度不一样,需要进行维度匹配

2.model.py

网络整体结构代码

论文里写pytorch伪代码 pytorch论文复现_2d_04

论文里写pytorch伪代码 pytorch论文复现_2d_05

1 import torch.nn as nn
  2 import torch
  3 
  4 class BasicBlock(nn.Module):
  5     #对应18层和34层的残差块
  6     expansion = 1
  7     def __init__(self,in_channel,out_channel,stride=1,downsample=None,**kwargs):
  8         super(BasicBlock,self).__init__()
  9         self.conv1 = nn.Conv2d(in_channels=in_channel,out_channels=out_channel,
 10                                kernel_size=3,stride=stride,padding=1,bias=False)
 11         self.bn1 = nn.BatchNorm2d(out_channel)
 12         self.relu = nn.ReLU()
 13         self.conv2 = nn.Conv2d(in_channels=out_channel,out_channels=out_channel,
 14                                kernel_size=3,stride=1,padding=1,bias=False)
 15         self.bn2 = nn.BatchNorm2d(out_channel)
 16         self.downsample = downsample
 17 
 18     def forward(self,x):
 19         identity = x
 20         if self.downsample is not None:
 21             identity = self.downsample(x)
 22         out = self.conv1(x)
 23         out = self.bn1(out)
 24         out = self.relu(out)
 25         out = self.conv2(out)
 26         out = self.bn2(out)
 27 
 28         out +=identity#跨层连接
 29         out = self.relu(out)
 30 
 31         return out
 32 
 33 class Bottleneck(nn.Module):
 34     #适用于50,101,152层的
 35     """
 36     注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。
 37     但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2,
 38     这么做的好处是能够在top1上提升大概0.5%的准确率。
 39     可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
 40     """
 41     expansion = 4
 42     def __int__(self,in_channel,out_channel,stride=1,downsample=None):
 43         super(Bottleneck,self).__init__()
 44 
 45         self.conv1 = nn.Conv2d(in_channels=in_channel,out_channels=out_channel,
 46                                kernel_size=1,stride=1,bias=False)
 47         self.bn1 = nn.BatchNorm2d(out_channel)
 48         self.conv2 = nn.Conv2d(in_channels=out_channel,out_channels=out_channel,
 49                                kernel_size=3,stride=stride,bias=False,padding=1)
 50         self.bn2 = nn.BatchNorm2d(out_channel)
 51         self.conv3 = nn.Conv2d(in_channels=out_channel,out_channels=out_channel*self.expansion,
 52                                kernel_size=1,stride=1,bias=False)#扩展维度
 53         self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
 54         self.relu = nn.ReLU(inplace=True)#inplace = True ,会改变输入数据的值,节省反复申请与释放内存的空间与时间,只是将原来的地址传递,效率更好
 55         self.downsample = downsample
 56 
 57     def forward(self,x):
 58         identity = x #跨层连接的x
 59         if self.downsample is not None:
 60             identity = self.downsample(x)
 61 
 62         out = self.conv1(x)
 63         out = self.bn1(out)
 64         out = self.relu(out)
 65 
 66         out = self.conv2(out)
 67         out = self.bn2(out)
 68         out = self.relu(out)
 69 
 70         out = self.conv3(out)
 71         out = self.bn3(out)
 72 
 73         out += identity
 74         out = self.relu(out)
 75 
 76         return out
 77 
 78 class ResNet(nn.Module):
 79     def __init__(self,block,blocks_num,num_classes=1000,include_top=True):
 80         super(ResNet, self).__init__()
 81         self.include_top = include_top
 82         self.in_channel = 64
 83 
 84 
 85         self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
 86                                padding=3, bias=False)
 87         self.bn1 = nn.BatchNorm2d(self.in_channel)
 88         self.relu = nn.ReLU(inplace=True)
 89         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 90         self.layer1 = self._make_layer(block, 64, blocks_num[0])
 91         self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
 92         self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
 93         self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
 94         if self.include_top:
 95             self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)
 96             self.fc = nn.Linear(512 * block.expansion, num_classes)
 97 
 98         for m in self.modules():
 99             '初始化权重'
100             if isinstance(m,nn.Conv2d):
101                 '随机矩阵显式创建权重'
102                 nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
103 
104 
105     def _make_layer(self,block,channel,block_num,stride=1):
106         downsample = None
107         if stride != 1 or self.in_channel != channel * block.expansion:#表示层数是大于50的
108             '''
109             表示虚线的残差结构,需要进行维度扩展,一般是每一层的第一个残差结构
110             第一层(conv2_x)的虚线残差结构只需要扩展维度
111             而后面层的虚线残差结构还需要下采样将图像大小缩小一般
112             '''
113             downsample = nn.Sequential(
114                 nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
115                 nn.BatchNorm2d(channel * block.expansion))
116 
117         layers = []
118         '放入第一块残差结构'
119         layers.append(block(self.in_channel,
120                             channel,
121                             downsample=downsample,
122                             stride=stride))
123         self.in_channel = channel * block.expansion
124 
125         '放入剩余的残差块'
126         for _ in range(1, block_num):
127         #实线残差结构,不需要维度扩展
128             layers.append(block(self.in_channel,
129                                 channel))
130 
131         return nn.Sequential(*layers)
132 
133 
134     def forward(self, x):
135         x = self.conv1(x)
136         x = self.bn1(x)
137         x = self.relu(x)
138         x = self.maxpool(x)
139 
140         x = self.layer1(x)
141         x = self.layer2(x)
142         x = self.layer3(x)
143         x = self.layer4(x)
144 
145         if self.include_top:
146             x = self.avgpool(x)
147             x = torch.flatten(x, 1)
148             x = self.fc(x)
149 
150         return x
151 
152 def resnet34(num_classes=1000,include_top=True):
153     '用于18,34层'
154     return ResNet(BasicBlock,[3,4,6,3],num_classes=num_classes,include_top=include_top)
155 
156 
157 def resnet101(num_classes=1000,include_top=True):
158     '用于50,101,152层,只需要将括号的数字改了即可'
159     return ResNet(BasicBlock,[3,4,23,3],num_classes=num_classes,include_top=include_top)
160 
161 if __name__=="__main__":
162     #没有固定的输入大小,因为有自适应池化层,但这里统一用输入为224*224
163     x = torch.rand([1, 3, 224, 224])
164     model = resnet34(num_classes=5)
165     y = model(x)
166     print(y)
167 
168     #统计模型参数
169     sum = 0
170     for name, param in model.named_parameters():
171         num = 1
172         for size in param.shape:
173             num *= size
174         sum += num
175         #print("{:30s} : {}".format(name, param.shape))
176     print("total param num {}".format(sum))#total param num 21,287,237

model.py

写完后保存,运行可以检查是否报错

如果需要打印模型参数,将代码注释去掉即可,得到resnet34层的网络参数为21,287,237,相比vgg16来说还是少了很多

3.数据划分

这里与AlexNet用的一样

分好后的数据集

论文里写pytorch伪代码 pytorch论文复现_卷积_06

 运行下面代码将数据按一定比例,划分为训练集和验证集

论文里写pytorch伪代码 pytorch论文复现_2d_04

论文里写pytorch伪代码 pytorch论文复现_2d_05

1 import os
 2 from shutil import copy
 3 import random
 4 
 5 
 6 def mkfile(file):
 7     if not os.path.exists(file):
 8         os.makedirs(file)
 9 
10 
11 # 获取data文件夹下所有文件夹名(即需要分类的类名)
12 file_path = 'flower_photos'
13 flower_class = [cla for cla in os.listdir(file_path)]
14 
15 # 创建 训练集train 文件夹,并由类名在其目录下创建5个子目录
16 mkfile('data/train')
17 for cla in flower_class:
18     mkfile('data/train/' + cla)
19 
20 # 创建 验证集val 文件夹,并由类名在其目录下创建子目录
21 mkfile('data/val')
22 for cla in flower_class:
23     mkfile('data/val/' + cla)
24 
25 # 划分比例,训练集 : 验证集 = 9 : 1
26 split_rate = 0.1
27 
28 # 遍历所有类别的全部图像并按比例分成训练集和验证集
29 for cla in flower_class:
30     cla_path = file_path + '/' + cla + '/'  # 某一类别的子目录
31     images = os.listdir(cla_path)  # iamges 列表存储了该目录下所有图像的名称
32     num = len(images)
33     eval_index = random.sample(images, k=int(num * split_rate))  # 从images列表中随机抽取 k 个图像名称
34     for index, image in enumerate(images):
35         # eval_index 中保存验证集val的图像名称
36         if image in eval_index:
37             image_path = cla_path + image
38             new_path = 'data/val/' + cla
39             copy(image_path, new_path)  # 将选中的图像复制到新路径
40 
41         # 其余的图像保存在训练集train中
42         else:
43             image_path = cla_path + image
44             new_path = 'data/train/' + cla
45             copy(image_path, new_path)
46         print("\r[{}] processing [{}/{}]".format(cla, index + 1, num), end="")  # processing bar
47     print()
48 
49 print("processing done!")

数据集划分的代码

4.train.py

这里训练我们同样使用迁移学习,来减少训练时间,

论文里写pytorch伪代码 pytorch论文复现_2d_04

论文里写pytorch伪代码 pytorch论文复现_2d_05

1 import os
  2 import sys
  3 import json
  4 import wandb
  5 import torch
  6 import torch.nn as nn
  7 import torch.optim as optim
  8 from torch.optim import lr_scheduler
  9 from torch.utils.data import DataLoader
 10 from torchvision import transforms,datasets
 11 from tqdm import tqdm
 12 import matplotlib.pyplot as plt
 13 from matplotlib.ticker import MaxNLocator
 14 
 15 from model import resnet34
 16 
 17 def main():
 18     # 如果显卡可用,则用显卡进行训练
 19     device = 'cuda' if torch.cuda.is_available() else 'cpu'
 20     print("using {} device".format(device))
 21     print(torch.cuda.get_device_name(0))
 22 
 23     data_transform = {
 24         "train":transforms.Compose([
 25                                    transforms.RandomResizedCrop(224),
 26                                    transforms.RandomHorizontalFlip(),
 27                                    transforms.ToTensor(),
 28                                    transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
 29         ]),
 30         "val":transforms.Compose([
 31                                  transforms.Resize(256),
 32                                  transforms.CenterCrop(224),
 33                                  transforms.ToTensor(),
 34                                  transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
 35         ])
 36     }
 37 
 38     #数据集路径
 39     ROOT_TRAIN = 'data/train'
 40     ROOT_TEST = 'data/val'
 41 
 42     batch_size = 16
 43     #加载数据集并处理
 44     train_dataset = datasets.ImageFolder(ROOT_TRAIN,transform=data_transform["train"])
 45     val_dataset = datasets.ImageFolder(ROOT_TEST,transform=data_transform["val"])
 46     # 划成一批批乱序数据集
 47     train_dataloader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
 48     val_dataloader = DataLoader(val_dataset,batch_size=batch_size,shuffle=True)
 49     #计算数据数量
 50     train_num = len(train_dataset)
 51     val_num = len(val_dataset)
 52     print("using {} images for training,{} images for validation.".format(train_num,val_num))
 53 
 54     #将{'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}键值对值反转,并保存
 55     flower_list = train_dataset.class_to_idx
 56     cla_dict = dict((val,key) for key,val in flower_list.items())
 57     #将键值对写入json文件
 58     json_str = json.dumps(cla_dict,indent=4)
 59     with open('class_indices.json','w')as json_file:
 60         json_file.write(json_str)#保存json文件(好处,方便转换为其它类型数据)用于预测用
 61 
 62     nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
 63     print('Using {} dataloader workers every process'.format(nw))
 64 
 65     model = resnet34()
 66     #加载预训练权重
 67     model_weight_path = "save_model/best_model.pth"
 68     assert os.path.exists(model_weight_path),"file {} does not exist.".format(model_weight_path)
 69     model.load_state_dict(torch.load(model_weight_path,map_location='cpu'))
 70 
 71     #change fc layer structure
 72     # in_channel = model.fc.in_features
 73     # model.fc = nn.Linear(in_channel,5)
 74     model.to(device)
 75 
 76     #损失函数
 77     loss_function = nn.CrossEntropyLoss()
 78     #优化器
 79     optimizer = optim.Adam(model.parameters(),lr=0.001)
 80     # 学习率每隔10epoch变为原来的0.1
 81     lr_s = lr_scheduler.StepLR(optimizer,step_size=10,gamma=0.5)
 82 
 83     #定义训练函数
 84     def train(dataloader,model,loss_fn,optimizer):
 85         model.train()
 86         loss,acc,n = 0.0,0.0,0
 87         train_bar = tqdm(dataloader,file=sys.stdout)
 88         for batch,(x,y) in enumerate(train_bar):
 89             #前向传播
 90             x,y = x.to(device),y.to(device)
 91             output = model(x)
 92             cur_loss = loss_fn(output,y)
 93             _,pred = torch.max(output,axis=-1)
 94             cur_acc = torch.sum(y==pred)/output.shape[0]
 95             #反向传播
 96             optimizer.zero_grad()#梯度清零
 97             cur_loss.backward()
 98             optimizer.step()
 99             loss += cur_loss.item()
100             acc += cur_acc.item()
101             n += 1
102             train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(i+1,epoch,cur_loss)
103         train_loss = loss / n
104         train_acc = acc / n
105 
106         print(f"train_loss:{train_loss}")
107         print(f"train_acc:{train_acc}")
108         return train_loss,train_acc
109 
110     #定义验证函数
111     def val(dataloader,model,loss_fn):
112         model.eval()
113         loss,acc,n = 0.0,0.0,0
114         val_bar = tqdm(dataloader,file=sys.stdout)
115         for batch,(x,y) in enumerate(val_bar):
116             #前向传播
117             x,y = x.to(device),y.to(device)
118             output = model(x)
119             cur_loss = loss_fn(output,y)
120             _,pred = torch.max(output,axis=-1)
121             cur_acc = torch.sum(y==pred)/output.shape[0]
122             loss += cur_loss.item()
123             acc += cur_acc.item()
124             n += 1
125             val_bar.desc = "val epoch[{}/{}] loss:{:.3f}".format(i+1,epoch,cur_loss)
126         val_loss = loss / n
127         val_acc = acc / n
128 
129         print(f"val_loss:{val_loss}")
130         print(f"val_acc:{val_acc}")
131         return val_loss,val_acc
132 
133     # 解决中文显示问题
134     plt.rcParams['font.sans-serif'] = ['SimHei']
135     plt.rcParams['axes.unicode_minus'] = False
136 
137     # 画图函数
138     def matplot_loss(train_loss, val_loss):
139         plt.figure()  # 声明一个新画布,这样两张图像的结果就不会出现重叠
140         plt.plot(train_loss, label='train_loss')  # 画图
141         plt.plot(val_loss, label='val_loss')
142         plt.legend(loc='best')  # 图例
143         plt.gca().xaxis.set_major_locator(MaxNLocator(integer=True))
144         plt.ylabel('loss', fontsize=12)
145         plt.xlabel('epoch', fontsize=12)
146         plt.title("训练集和验证集loss对比图")
147         folder = 'result'
148         if not os.path.exists(folder):
149             os.mkdir('result')
150         plt.savefig('result/loss.jpg')
151 
152     def matplot_acc(train_acc, val_acc):
153         plt.figure()  # 声明一个新画布,这样两张图像的结果就不会出现重叠
154         plt.plot(train_acc, label='train_acc')  # 画图
155         plt.plot(val_acc, label='val_acc')
156         plt.legend(loc='best')  # 图例
157         plt.gca().xaxis.set_major_locator(MaxNLocator(integer=True))
158         plt.ylabel('acc', fontsize=12)
159         plt.xlabel('epoch', fontsize=12)
160         plt.title("训练集和验证集acc对比图")
161         plt.savefig('result/acc.jpg')
162 
163     #开始训练
164     train_loss_list = []
165     val_loss_list = []
166     train_acc_list = []
167     val_acc_list = []
168 
169     epoch = 5
170     max_acc = 0
171 
172     wandb.init(project='ResNet',name='resnet34.1')
173 
174     for i in range(epoch):
175         lr_s.step()
176         train_loss,train_acc=train(train_dataloader,model,loss_function,optimizer)
177         wandb.log({'train_loss': train_loss, 'train_acc': train_acc})
178         val_loss,val_acc=val(val_dataloader,model,loss_function)
179         wandb.log({'val_loss': val_loss, 'val_acc': val_acc})
180 
181         train_loss_list.append(train_loss)
182         val_loss_list.append(val_loss)
183         train_acc_list.append(train_acc)
184         val_acc_list.append(val_acc)
185         #保存最好的模型权重
186         if val_acc > max_acc:
187             folder = 'save_model'
188             if not os.path.exists(folder):
189                 os.mkdir('save_model')
190             max_acc = val_acc
191             print('save best model')
192             torch.save(model.state_dict(), "save_model/best_model.pth")
193         # 保存最后一轮
194         # if i == epoch - 1:
195         #     torch.save(model.state_dict(), 'save_model/last_model.pth')
196 
197     print("Finished Training")
198     #画图
199     # matplot_loss(train_loss_list,val_loss_list)
200     # matplot_acc(train_acc_list,val_acc_list)
201 
202 if __name__=='__main__':
203     main()

train.py

训练结束后可以得到训练集和验证集的loss,acc对比图

论文里写pytorch伪代码 pytorch论文复现_卷积_11

 

 

论文里写pytorch伪代码 pytorch论文复现_ide_12

 

 简单的评估下:同样在resnet34的训练中可以看出,迁移学习的强大

总结

相比VGG-16,resnet可以训练层数更深的网络,并且少的多的参数

自己敲一下代码,会学到很多不懂的东西

最后,多看,多学,多试,总有一天你会称为大佬!