文章目录
- 1 总体概述
- 2 加载权重
- 3 冻结特征提取层权重进行训练
- 4 项目整体代码
- 5 感谢链接
1 总体概述
在Imagenet上训练的预训练权重,用到CIFAR10数据集上,再代码层面有以下几个问题,模型最后一层权重个数不同,如何加载呢?要是想冻结权重进行训练又怎么办呢?下面分别进行解答。
2 加载权重
以Mobilenetv2为例,Imagenet2012分类数据集,类别个数为1000,故网络最后一层节点个数为1000,CIFAR10有10个类,最后一层网络参数为10。
故只能加载前面层的权重,最后层的权重不进行加载。
实现方式如下:
# create model
net = MobileNetV2(num_classes=10)
# load pretrain weights
# download url: https://download.pytorch.org/models/mobilenet_v2-b0353104.pth
model_weight_path = "./pretrained/mobilenet_v2-b0353104.pth"
assert os.path.exists(model_weight_path), "file {} dose not exist.".format(model_weight_path)
# pre_weights:OrderedDict,在Imagenet上的预训练权重
pre_weights = torch.load(model_weight_path, map_location='cpu')
# delete classifier weights
# pre_dict:普通dict
# .numel():返回元素个数
# 元素个数和网络中相等,才把对应权重保留下来
pre_dict = {k: v for k, v in pre_weights.items() if net.state_dict()[k].numel() == v.numel()}
missing_keys, unexpected_keys = net.load_state_dict(pre_dict, strict=False)
3 冻结特征提取层权重进行训练
简单训练一下时,需要冻结前面特征提取层的权重,让它们不参与训练,实现代码如下:
# freeze features weights
# 冻结特征提取层的权重,只更新最后分类层的权重
# 若想全都更新,把下面这两行注释掉即可,或者训练多少个epoch后再解冻
# 解冻方法:for param in net.features.parameters():
# param.requires_grad = True
for param in net.features.parameters():
param.requires_grad = False
4 项目整体代码
train.py
代码如下:
import os
import sys
import torch.backends.cudnn as cudnn
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from tqdm import tqdm
from model_v2 import MobileNetV2
def main():
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))
batch_size = 16
epochs = 5
data_transform = {
"train": transforms.Compose([transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])]),
"val": transforms.Compose([transforms.ToTensor(),
transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])])}
data_root = "./dataset" # get data root path
train_dataset = datasets.CIFAR10(root=data_root, train=True,
transform=data_transform["train"])
train_num = len(train_dataset)
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
print('Using {} dataloader workers every process'.format(nw))
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=nw)
validate_dataset = datasets.CIFAR10(root=data_root, train=False,
transform=data_transform["val"])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=batch_size, shuffle=False,
num_workers=nw)
print("using {} images for training, {} images for validation.".format(train_num,
val_num))
# create model
net = MobileNetV2(num_classes=10)
# load pretrain weights
# download url: https://download.pytorch.org/models/mobilenet_v2-b0353104.pth
model_weight_path = "./pretrained/mobilenet_v2-b0353104.pth"
assert os.path.exists(model_weight_path), "file {} dose not exist.".format(model_weight_path)
# pre_weights:OrderedDict,在Imagenet上的预训练权重
pre_weights = torch.load(model_weight_path, map_location='cpu')
# delete classifier weights
# pre_dict:普通dict
# .numel():返回元素个数
# 元素个数和网络中相等,才把对应权重保留下来
pre_dict = {k: v for k, v in pre_weights.items() if net.state_dict()[k].numel() == v.numel()}
missing_keys, unexpected_keys = net.load_state_dict(pre_dict, strict=False)
# freeze features weights
# 冻结特征提取层的权重,只更新最后分类层的权重
# 若想全都更新,把下面这两行注释掉即可,或者训练多少个epoch后再解冻
# 解冻方法:for param in net.features.parameters():
# param.requires_grad = True
for param in net.features.parameters():
param.requires_grad = False
# 多卡分布式训练,方式之一
# 详情可参考:
if torch.cuda.is_available():
net = torch.nn.DataParallel(net) # 单GPU跑套DP的话,指标可能会降
cudnn.benchmark = True
(device)
# define loss function
loss_function = nn.CrossEntropyLoss()
# construct an optimizer
params = [p for p in net.parameters() if p.requires_grad]
# 用Adam还需要手动学习率衰减吗?
# 众说纷纭,毕竟Adam自适应学习率
optimizer = optim.Adam(params, lr=0.0001)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.94)
best_acc = 0.0
save_path = './output/MobileNetV2.pth'
train_steps = len(train_loader)
for epoch in range(epochs):
# train
net.train()
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
logits = net(images.to(device))
loss = loss_function(logits, (device))
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
epochs,
loss)
# validate
net.eval()
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
# loss = loss_function(outputs, test_labels)
predict_y = torch.max(outputs, dim=1)[1]
acc += torch.eq(predict_y, val_(device)).sum().item()
val_bar.desc = "valid epoch[{}/{}]".format(epoch + 1,
epochs)
val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))
if val_accurate > best_acc:
best_acc = val_accurate
# torch.save(net.state_dict(), save_path) # 单卡训练时,这样保存即可
torch.save(net.module.state_dict(), save_path) # 双卡训练时,这样保存才合适!
lr_scheduler.step()
print('Finished Training')
if __name__ == '__main__':
main()
通过下方命令运行:
CUDA_VISIBLE_DEVICES=2,1 python3 train.py
运行过程如下:
再次看一下用DP训练时的情况,主卡上占的显存多一些。
下载项目所有内容:
链接:https://pan.baidu.com/s/1YknbF_3DAPa5uN818oq5QA
提取码:c5b5