之前写过一篇实现猫十二分类的文章,写出了大体的流程,但实际效果并不佳。本文采取微调预训练模型的方式,使准确率从0.3提升到了0.93。大体流程参考ResNet猫十二分类,本文只给出不同的地方。

代码框架根据一篇比较漂亮的resnet代码,借鉴,学习。

迁移学习

迁移学习的两种方式

  1. 微调。从线上下载以训练完毕的模型,利用本地数据集进行参数的微调,更新的是所有参数
  2. 用作特征提取器。外加一层全连接,只训练全连接部分的参数

1. 数据

  • dataset定义
# Dataset
import os
import cv2
import torch
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
import numpy as np
import random
from PIL import Image


class myData(Dataset):
    def __init__(self, kind):
        super(myData, self).__init__()
        self.mode = kind
        # self.transform = transforms.ToTensor()
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Resize((224, 224)),
            transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5)),
        ])

        if kind == 'test':
            self.imgs = self.load_origin_data()

        elif kind == 'train':
            self.imgs, self.labels = self.load_origin_data()
            # self.imgs, self.labels = self.enlarge_dataset(kind, self.imgs, self.labels, 0.5)
            print('train size:')
            print(len(self.imgs))

        else:
            self.imgs, self.labels = self.load_origin_data()

    def __getitem__(self, index):
        if self.mode == 'test':
            sample = self.transform(self.imgs[index])
            return sample
        else:
            sample = self.transform(self.imgs[index])
            return sample, torch.tensor(self.labels[index])

    def __len__(self):
        return len(self.imgs)

    def load_origin_data(self):
        filelist = './data/%s_split_list.txt' % self.mode

        imgs = []
        labels = []
        data_dir = os.getcwd()
        if self.mode == 'train' or self.mode == 'val':
            with open(filelist) as flist:
                lines = [line.strip() for line in flist]
                if self.mode == 'train':
                    np.random.shuffle(lines)
                for line in lines:
                    img_path, label = line.split('&')
                    img_path = os.path.join(data_dir, img_path)
                    try:
                        # img, label = process_image((img_path, label), mode, color_jitter, rotate)
                        img = Image.fromarray(cv2.imdecode(np.fromfile(img_path, dtype=np.float32), 1))
                        imgs.append(img)
                        labels.append(int(label))
                    except:
                        print(img_path)
                        continue
                return imgs, labels
        elif self.mode == 'test':
            full_lines = os.listdir('data/test/')
            lines = [line.strip() for line in full_lines]
            for img_path in lines:
                img_path = os.path.join(data_dir, "data/test/", img_path)
                # try:
                #     img= process_image((img_path, label), mode, color_jitter, rotate)
                #     imgs.append(img)
                # except:
                #     print(img_path)
                # img = Image.open(img_path)

                img = Image.fromarray(cv2.imdecode(np.fromfile(img_path, dtype=np.float32), 1))
                imgs.append(img)
            return imgs


    def load_data(self, mode, shuffle, color_jitter, rotate):
        '''
        :return : img, label
        img: (channel, w, h)
        '''
        filelist = './data/%s_split_list.txt' % mode

        imgs = []
        labels = []
        data_dir = os.getcwd()
        if mode == 'train' or mode == 'val':
            with open(filelist) as flist:
                lines = [line.strip() for line in flist]
                if shuffle:
                    np.random.shuffle(lines)

                for line in lines:
                    img_path, label = line.split('&')
                    img_path = os.path.join(data_dir, img_path)
                    try:
                        img, label = process_image((img_path, label), mode, color_jitter, rotate)
                        imgs.append(img)
                        labels.append(label)
                    except:
                        # print(img_path)
                        continue
                return imgs, labels

        elif mode == 'test':
            full_lines = os.listdir('data/test/')
            lines = [line.strip() for line in full_lines]
            for img_path in lines:
                img_path = os.path.join(data_dir, "data/test/", img_path)
                # try:
                #     img= process_image((img_path, label), mode, color_jitter, rotate)
                #     imgs.append(img)
                # except:
                #     print(img_path)
                img = process_image((img_path, 0), mode, color_jitter, rotate)
                imgs.append(img)
            return imgs

# dataset
# img_datasets = {x: myData(x) for x in ['train', 'val']}
# dataset_sizes = {x: len(img_datasets[x]) for x in ['train', 'val']}
# test_datasets = {'test': myData('test')}
# test_size = {'test': len(test_datasets)}

首先数据部分有一些改动

img_datasets = {x: myData(x) for x in ['train', 'val', 'test']}
dataset_sizes = {x: len(img_datasets[x]) for x in ['train', 'val', 'test']}

# dataset准备完毕,开始创建dataloader
train_loader = DataLoader(
    dataset=img_datasets['train'],
    batch_size=batches,
    shuffle=True
)

val_loader = DataLoader(
    dataset=img_datasets['val'],
    batch_size=1,
    shuffle=False
)

test_loader = DataLoader(
    dataset=img_datasets['test'],
    batch_size=1,
    shuffle=False
)

dataloaders = {
    'train': train_loader,
    'val': val_loader,
    'test': test_loader
}

2. 训练阶段

值得参考的tricks有:

  1. 训练过程中保存最优模型的参数,在每一个epochs中,若验证的准确率有所提升则更新参数
  2. 每个epoch中都包含训练和验证两个阶段。注意写法
# train
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def train_model(model, criterion, optimizer, scheduler, num_epochs=25):   # 括号中的参数是模型,损失函数标准,优化器,学习速率衰减方式,训练次数

    best_model_wts = copy.deepcopy(model.state_dict())    # 先深拷贝一份当前模型的参数(wts=weights),后面迭代过程中若遇到更优模型则替换
    best_acc = 0.0                                        # 最佳正确率,用于判断是否替换best_model_wts

    for epoch in range(num_epochs):      # 开启每一个epoch
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))

        for phase in ['train', 'val']:   # 每个epoch中都包含训练与验证两个阶段
            if phase == 'train':      
                model.train()            
            else:                       
                model.eval()            
                # 与train不同的是,test过程中没有batch-normalization与dropout,因此要区别对待。 
                # batchnorm是针对minibatch的,测试过程中每个样本单独测试,不存在minibatch

            running_loss = 0.0           
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)           
                labels = labels.to(device)
                
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):   # torch.set_grad_enabled(False/True)是上下文管理器,用于确定是否对with下的所有语句设计的参数求导,如果设置为false则新节点不可求导
                    outputs = model(inputs)            # 网络模型的前向传播,就是为了从输入得到输出
                    _, preds = torch.max(outputs, 1)   # 在维度1(行方向)查找最大值
                    loss = criterion(outputs, labels)  # 输出结果与label相比较

                    if phase == 'train':
                        loss.backward()     # 误差反向传播,计算每个w与b的更新值
                        optimizer.step()    # 将这些更新值施加到模型上

                # train, val都一样
                running_loss += loss.item() * inputs.size(0)         # 计算当前epoch过程中,所有batch的损失和
                running_corrects += torch.sum(preds == labels.data)  # 判断正确的样本数 
            if phase == 'train':    # 完成本次epoch所有样本的训练与验证之后,就对学习速率进行修正
                scheduler.step()     # 在训练过程中,要根据损失的情况修正学习速率

            epoch_loss = running_loss / dataset_sizes[phase]               # 当前epoch的损失值是loss总和除以样本数
            epoch_acc = running_corrects.double() / dataset_sizes[phase]   # 当前epoch的正确率

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(         # 输出train/test,损失、正确率
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:                # 如果是val阶段,并且当前epoch的acc比best acc大
                best_acc = epoch_acc                                    # 就替换best acc为当前epoch的acc
                best_model_wts = copy.deepcopy(model.state_dict())      # 将best_model替换为当前模型
        
    print('Best val Acc: {:4f}'.format(best_acc)) 

    # 将最佳模型的相关参数加载到model中
    model.load_state_dict(best_model_wts)                        
    return model

3. 迁移学习部分

对模型所有层的所有参数都进行目标域的训练。

使用pretrain = True的方式得到预训练模型,更改全连接层的输出维度,然后训练残差模型

# 迁移学习
model_ft = models.resnet50(pretrained=True)    
num_ftrs = model_ft.fc.in_features            
model_ft.fc = nn.Linear(num_ftrs, 12)           
model_ft = model_ft.to(device)         

criterion = nn.CrossEntropyLoss()       

optimizer_ft = torch.optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)   

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=5, gamma=0.1) 

model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,num_epochs=15)    # 模型训练

4. 保存模型

没啥说的

def vali(M ,dataset):
    M.eval()
    with torch.no_grad():
        correct = 0
        for (data, target) in val_loader:
            data, target = data.to(device), target.to(device)

            pred = M(data)
            _, id = torch.max(pred, 1)
            correct += torch.sum(id == target.data)
        print("test accu: %.03f%%" % (100 * correct / len(dataset)))
    return (100 * correct / len(dataset)).item()
test_accu = int(vali(model_ft, img_datasets['val']) * 100)

model_name = 'val_{}.pkl'.format(test_accu)


torch.save(model_ft.state_dict(), os.path.join("./myModels", model_name))

5. 读取模型

# 加载模型
model_ft = models.resnet50(pretrained=True)    
num_ftrs = model_ft.fc.in_features            
model_ft.fc = nn.Linear(num_ftrs, 12)   # 注意更改维度         
model_ft = model_ft.to(device) # 注意要放入gpu,保持和参数数据类型一致

model_ft.load_state_dict(torch.load("./myModels/val_9343.pkl"))
vali(model_ft, img_datasets['val'])

输出结果:

test accu: 93.433%