之前写过一篇实现猫十二分类的文章,写出了大体的流程,但实际效果并不佳。本文采取微调预训练模型的方式,使准确率从0.3提升到了0.93。大体流程参考ResNet猫十二分类,本文只给出不同的地方。
代码框架根据一篇比较漂亮的resnet代码,借鉴,学习。
迁移学习
迁移学习的两种方式
- 微调。从线上下载以训练完毕的模型,利用本地数据集进行参数的微调,更新的是所有参数
- 用作特征提取器。外加一层全连接,只训练全连接部分的参数
1. 数据
- dataset定义
# Dataset
import os
import cv2
import torch
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
import numpy as np
import random
from PIL import Image
class myData(Dataset):
def __init__(self, kind):
super(myData, self).__init__()
self.mode = kind
# self.transform = transforms.ToTensor()
self.transform = transforms.Compose([
transforms.ToTensor(),
transforms.Resize((224, 224)),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5)),
])
if kind == 'test':
self.imgs = self.load_origin_data()
elif kind == 'train':
self.imgs, self.labels = self.load_origin_data()
# self.imgs, self.labels = self.enlarge_dataset(kind, self.imgs, self.labels, 0.5)
print('train size:')
print(len(self.imgs))
else:
self.imgs, self.labels = self.load_origin_data()
def __getitem__(self, index):
if self.mode == 'test':
sample = self.transform(self.imgs[index])
return sample
else:
sample = self.transform(self.imgs[index])
return sample, torch.tensor(self.labels[index])
def __len__(self):
return len(self.imgs)
def load_origin_data(self):
filelist = './data/%s_split_list.txt' % self.mode
imgs = []
labels = []
data_dir = os.getcwd()
if self.mode == 'train' or self.mode == 'val':
with open(filelist) as flist:
lines = [line.strip() for line in flist]
if self.mode == 'train':
np.random.shuffle(lines)
for line in lines:
img_path, label = line.split('&')
img_path = os.path.join(data_dir, img_path)
try:
# img, label = process_image((img_path, label), mode, color_jitter, rotate)
img = Image.fromarray(cv2.imdecode(np.fromfile(img_path, dtype=np.float32), 1))
imgs.append(img)
labels.append(int(label))
except:
print(img_path)
continue
return imgs, labels
elif self.mode == 'test':
full_lines = os.listdir('data/test/')
lines = [line.strip() for line in full_lines]
for img_path in lines:
img_path = os.path.join(data_dir, "data/test/", img_path)
# try:
# img= process_image((img_path, label), mode, color_jitter, rotate)
# imgs.append(img)
# except:
# print(img_path)
# img = Image.open(img_path)
img = Image.fromarray(cv2.imdecode(np.fromfile(img_path, dtype=np.float32), 1))
imgs.append(img)
return imgs
def load_data(self, mode, shuffle, color_jitter, rotate):
'''
:return : img, label
img: (channel, w, h)
'''
filelist = './data/%s_split_list.txt' % mode
imgs = []
labels = []
data_dir = os.getcwd()
if mode == 'train' or mode == 'val':
with open(filelist) as flist:
lines = [line.strip() for line in flist]
if shuffle:
np.random.shuffle(lines)
for line in lines:
img_path, label = line.split('&')
img_path = os.path.join(data_dir, img_path)
try:
img, label = process_image((img_path, label), mode, color_jitter, rotate)
imgs.append(img)
labels.append(label)
except:
# print(img_path)
continue
return imgs, labels
elif mode == 'test':
full_lines = os.listdir('data/test/')
lines = [line.strip() for line in full_lines]
for img_path in lines:
img_path = os.path.join(data_dir, "data/test/", img_path)
# try:
# img= process_image((img_path, label), mode, color_jitter, rotate)
# imgs.append(img)
# except:
# print(img_path)
img = process_image((img_path, 0), mode, color_jitter, rotate)
imgs.append(img)
return imgs
# dataset
# img_datasets = {x: myData(x) for x in ['train', 'val']}
# dataset_sizes = {x: len(img_datasets[x]) for x in ['train', 'val']}
# test_datasets = {'test': myData('test')}
# test_size = {'test': len(test_datasets)}
首先数据部分有一些改动
img_datasets = {x: myData(x) for x in ['train', 'val', 'test']}
dataset_sizes = {x: len(img_datasets[x]) for x in ['train', 'val', 'test']}
# dataset准备完毕,开始创建dataloader
train_loader = DataLoader(
dataset=img_datasets['train'],
batch_size=batches,
shuffle=True
)
val_loader = DataLoader(
dataset=img_datasets['val'],
batch_size=1,
shuffle=False
)
test_loader = DataLoader(
dataset=img_datasets['test'],
batch_size=1,
shuffle=False
)
dataloaders = {
'train': train_loader,
'val': val_loader,
'test': test_loader
}
2. 训练阶段
值得参考的tricks有:
- 训练过程中保存最优模型的参数,在每一个epochs中,若验证的准确率有所提升则更新参数
- 每个epoch中都包含训练和验证两个阶段。注意写法
# train
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def train_model(model, criterion, optimizer, scheduler, num_epochs=25): # 括号中的参数是模型,损失函数标准,优化器,学习速率衰减方式,训练次数
best_model_wts = copy.deepcopy(model.state_dict()) # 先深拷贝一份当前模型的参数(wts=weights),后面迭代过程中若遇到更优模型则替换
best_acc = 0.0 # 最佳正确率,用于判断是否替换best_model_wts
for epoch in range(num_epochs): # 开启每一个epoch
print('Epoch {}/{}'.format(epoch + 1, num_epochs))
for phase in ['train', 'val']: # 每个epoch中都包含训练与验证两个阶段
if phase == 'train':
model.train()
else:
model.eval()
# 与train不同的是,test过程中没有batch-normalization与dropout,因此要区别对待。
# batchnorm是针对minibatch的,测试过程中每个样本单独测试,不存在minibatch
running_loss = 0.0
running_corrects = 0
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
optimizer.zero_grad()
with torch.set_grad_enabled(phase == 'train'): # torch.set_grad_enabled(False/True)是上下文管理器,用于确定是否对with下的所有语句设计的参数求导,如果设置为false则新节点不可求导
outputs = model(inputs) # 网络模型的前向传播,就是为了从输入得到输出
_, preds = torch.max(outputs, 1) # 在维度1(行方向)查找最大值
loss = criterion(outputs, labels) # 输出结果与label相比较
if phase == 'train':
loss.backward() # 误差反向传播,计算每个w与b的更新值
optimizer.step() # 将这些更新值施加到模型上
# train, val都一样
running_loss += loss.item() * inputs.size(0) # 计算当前epoch过程中,所有batch的损失和
running_corrects += torch.sum(preds == labels.data) # 判断正确的样本数
if phase == 'train': # 完成本次epoch所有样本的训练与验证之后,就对学习速率进行修正
scheduler.step() # 在训练过程中,要根据损失的情况修正学习速率
epoch_loss = running_loss / dataset_sizes[phase] # 当前epoch的损失值是loss总和除以样本数
epoch_acc = running_corrects.double() / dataset_sizes[phase] # 当前epoch的正确率
print('{} Loss: {:.4f} Acc: {:.4f}'.format( # 输出train/test,损失、正确率
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'val' and epoch_acc > best_acc: # 如果是val阶段,并且当前epoch的acc比best acc大
best_acc = epoch_acc # 就替换best acc为当前epoch的acc
best_model_wts = copy.deepcopy(model.state_dict()) # 将best_model替换为当前模型
print('Best val Acc: {:4f}'.format(best_acc))
# 将最佳模型的相关参数加载到model中
model.load_state_dict(best_model_wts)
return model
3. 迁移学习部分
对模型所有层的所有参数都进行目标域的训练。
使用pretrain = True的方式得到预训练模型,更改全连接层的输出维度,然后训练残差模型
# 迁移学习
model_ft = models.resnet50(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 12)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = torch.optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=5, gamma=0.1)
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,num_epochs=15) # 模型训练
4. 保存模型
没啥说的
def vali(M ,dataset):
M.eval()
with torch.no_grad():
correct = 0
for (data, target) in val_loader:
data, target = data.to(device), target.to(device)
pred = M(data)
_, id = torch.max(pred, 1)
correct += torch.sum(id == target.data)
print("test accu: %.03f%%" % (100 * correct / len(dataset)))
return (100 * correct / len(dataset)).item()
test_accu = int(vali(model_ft, img_datasets['val']) * 100)
model_name = 'val_{}.pkl'.format(test_accu)
torch.save(model_ft.state_dict(), os.path.join("./myModels", model_name))
5. 读取模型
# 加载模型
model_ft = models.resnet50(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 12) # 注意更改维度
model_ft = model_ft.to(device) # 注意要放入gpu,保持和参数数据类型一致
model_ft.load_state_dict(torch.load("./myModels/val_9343.pkl"))
vali(model_ft, img_datasets['val'])
输出结果:
test accu: 93.433%