聊一下参加本次课程的一些体会,因为本人之前对于FCN、PSPNet、DepLab系列等经典的分割网络仅仅停留在理论阶段,并不会编码复现,当我正苦苦纠结于如何复现时,碰巧看到了这个课程,哇,真的是美滋滋,解决了我第一次复现网络的大难题。通过“手敲代码的神”朱老师的现场coding,学习到了如何从零使用Paddle框架搭建一个深度学习的网络,因为之前学习过xx流(友军实锤),感觉Paddle框架还是挺好使用的,虽然我代码能力不强,但是基于文档还是可以写一点东西出来的。


基础框架搭建流程

  • 下面简单看一下基础的基于Paddle动态图的网络搭建流程:
  • Paddle的数据加载方式:
  • 对于数据训练的增强操作:
  • 模型的训练:
  • 模型的损失:
  • 总结:


下面简单看一下基础的基于Paddle动态图的网络搭建流程:

1.设置模型的运行环境。
2.创建动态图。
3.声明模型。
4.输入数据(注意格式转换)。
5.使用模型进行计算输出。
6.输出数据。

import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import to_variable  # 转换为paddle数据格式
from paddle.fluid.dygraph import Pool2D  #TODO
from paddle.fluid.dygraph import Conv2D  #TODO
import numpy as np
np.set_printoptions(precision=2)


class BasicModel(fluid.dygraph.Layer):
    # BasicModel contains:  BasicModel就是我们的网络
    # 1. pool:   4x4 max pool op, with stride 4
    # 2. conv:   3x3 kernel size, takes RGB image as input and output num_classes channels,
    #            note that the feature map size should be the same
    # 3. upsample: upsample to input size
    #
    # TODOs:
    # 1. The model takes an random input tensor with shape (1, 3, 8, 8)
    # 2. The model outputs a tensor with same HxW size of the input, but C = num_classes
    # 3. Print out the model output in numpy format 

    def __init__(self, num_classes=59):
        super(BasicModel, self).__init__()
        # 属性即为paddle中的操作函数
        self.pool = Pool2D(pool_size=2,pool_stride=2)  # 定义池化操作
        self.conv = Conv2D(num_channels=3, num_filters=num_classes,filter_size=1)  # 定义卷积操作

    def forward(self, inputs):  # inputs的维度(n,c,h,w) 
        x = self.pool(inputs)  # 最大池化,图片大小缩小一半
        x = fluid.layers.interpolate(x, out_shape=(inputs.shape[2], inputs.shape[3]))  # 上采样,维度为out_shape(h,w)
        x = self.conv(x)  # 卷积操作,提取特征
        return x

def main():
    place = paddle.fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        model = BasicModel(num_classes=59)
        model.eval()
        input_data = np.random.rand(1,3,8,8).astype(np.float32)  # 随机初始化一个np.array量
        print('Input data shape: ', input_data.shape)
        input_data =  to_variable(input_data)  # 将np.array转换为paddle支持的格式
        output_data = model(input_data)  # 通过模型计算输出值
        output_data = output_data.numpy()  # 将模型计算出的tensor转换为numpy
        print('Output data shape: ', output_data.shape)

if __name__ == "__main__":
    main()

Paddle的数据加载方式:

框架已经写好了数据加载格式,我们只需要自定义BasicDataLoader类即可。

import os
import random
import numpy as np
import cv2
import paddle.fluid as fluid

# 数据增强类
class Transform(object):
    def __init__(self,size = 256):
        self.size = size

    def __call__(self,input,label):
        # 对输入的数据和标签进行增强
        input = cv2.resize(input,(self.size,self.size),interpolation = cv2.INTER_LINEAR)
        label = cv2.resize(label,(self.size,self.size),interpolation = cv2.INTER_NEAREST)

        return input,label

# 基础数据加载类
class BasicDataLoader(object):
    def __init__(self,
                 image_folder,
                 image_list_file,
                 transform=None,
                 shuffle=True):
        self.image_folder = image_folder
        self.image_list_file = image_list_file
        self.transform = transform
        self.shuffle = shuffle
        
        self.data_list = self.read_list()  # data_list属性的值为read_list()函数的返回值


    def read_list(self):
        data_list = []
        with open(self.image_list_file) as infile:
            for line in infile:
                data_path = os.path.join(self.image_folder,line.split()[0])
                label_path = os.path.join(self.image_folder,line.split()[1])
                data_list.append((data_path,label_path))

        random.shuffle(data_list)
        return data_list


    def preprocess(self, data, label):
        h,w,c = data.shape
        h_gt,w_gt = label.shape

        assert h == h_gt,"ERROR"
        assert w == w_gt,"ERROR"

        if self.transform:
            data,label = self.transform(data,label)

        label = label[:,:,np.newaxis]  # 给lable多一维

        return data,label


    # 方法复写
    def __len__(self):
        return len(self.data_list)


    def __call__(self):
        for data_path,label_path in self.data_list:
            data = cv2.imread(data_path,cv2.IMREAD_COLOR)
            data = cv2.cvtColor(data,cv2.COLOR_BGR2RGB)
            label = cv2.imread(label_path,cv2.IMREAD_GRAYSCALE)
            print(data.shape,label.shape)
            data,label = self.preprocess(data,label)
            yield data,label



def main():
    batch_size = 5
    place = fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        transform = Transform(256)
        # TODO: create BasicDataloder instance
        basic_dataloader = BasicDataLoader(
                image_folder=r"./work/dummy_data",
                image_list_file="./work/dummy_data/list.txt",
                transform=transform,
                shuffle=True
                )  # 创建自己的数据加载器 自己定义的
        # image_folder="./dummy_data"
        # image_list_file="./dummy_data/list.txt"

        # TODO: create fluid.io.DataLoader instance
        dataloader = fluid.io.DataLoader.from_generator(capacity=1,use_multiprocess=False)  # 创建paddle的数据加载器

        # TODO: set sample generator for fluid dataloader  为paddle的数据加载器设置参数 
        dataloader.set_sample_generator(basic_dataloader,
                                        batch_size=batch_size,
                                        places=place)


        num_epoch = 2
        for epoch in range(1, num_epoch+1):
            print(f'Epoch [{epoch}/{num_epoch}]:')
            for idx, (data, label) in enumerate(dataloader):
                print(f'Iter {idx}, Data shape: {data.shape}, Label shape: {label.shape}')

if __name__ == "__main__":
    main()

对于数据训练的增强操作:

对于训练样本不足的情况,使用数据增强(平移,裁剪,缩放,对称,归一化等),主要是基于OpenCV的一些数字图像处理的图像预处理操作。

import cv2
import numpy as np

class Compose(object):  # 用来管理各个转换的
    def __init__(self, transforms):
        self.transforms = transforms  # 各个transform操作集合
    def __call__(self, image, label=None):
        for t in self.transforms:
            image, label = t(image, label)
        return image, label


class Normalize(object):  # 数据标准化 将像素值归一标准化
    def __init__(self, mean_val, std_val, val_scale=1):
        # set val_scale = 1 if mean and std are in range (0,1)
        # set val_scale to other value, if mean and std are in range (0,255)
        self.mean = np.array(mean_val, dtype=np.float32)  # 数据格式为np.array
        self.std = np.array(std_val, dtype=np.float32)
        self.val_scale = 1/255.0 if val_scale==1 else 1
    def __call__(self, image, label=None):
        image = image.astype(np.float32)  # 将数据转换为numpy.array
        image = image * self.val_scale
        image = image - self.mean
        image = image * (1 / self.std)
        return image, label


class ConvertDataType(object):  # 转换数据格式
    def __call__(self, image, label=None):
        if label is not None:
            label = label.astype(np.int64)  # lable转换为int64
        return image.astype(np.float32), label  # image转换为float32


class Pad(object):
    def __init__(self, size, ignore_label=255, mean_val=0, val_scale=1):
        # set val_scale to 1 if mean_val is in range (0, 1)
        # set val_scale to 255 if mean_val is in range (0, 255) 
        factor = 255 if val_scale == 1 else 1

        self.size = size
        self.ignore_label = ignore_label
        self.mean_val=mean_val
        # from 0-1 to 0-255
        if isinstance(self.mean_val, (tuple,list)):
            self.mean_val = [int(x* factor) for x in self.mean_val]
        else:
            self.mean_val = int(self.mean_val * factor)


    def __call__(self, image, label=None):
        h, w, c = image.shape
        pad_h = max(self.size - h, 0)
        pad_w = max(self.size - w, 0)

        pad_h_half = int(pad_h / 2)
        pad_w_half = int(pad_w / 2)

        if pad_h > 0 or pad_w > 0:

            image = cv2.copyMakeBorder(image,
                                       top=pad_h_half,
                                       left=pad_w_half,
                                       bottom=pad_h - pad_h_half,
                                       right=pad_w - pad_w_half,
                                       borderType=cv2.BORDER_CONSTANT,
                                       value=self.mean_val)
            if label is not None:
                label = cv2.copyMakeBorder(label,
                                           top=pad_h_half,
                                           left=pad_w_half,
                                           bottom=pad_h - pad_h_half,
                                           right=pad_w - pad_w_half,
                                           borderType=cv2.BORDER_CONSTANT,
                                           value=self.ignore_label)
        return image, label


# TODO
class CenterCrop(object): # CenterCrop是以输入图的中心点为中心点做指定size的crop操作
    def __init__(self,crop_size):
        self.crop_h = crop_size
        self.crop_w = crop_size

    def __call__(self,image,label=None):  # iamge 为 HWC
        h,w,c = image.shape
        h_start = (h - self.crop_h) // 2
        w_start = (w - self.crop_w) // 2

        image = image[h_start:h_start + self.crop_h,w_start:w_start + self.crop_w,:]

        if label is not None:
            label = label[h_start:h_start + self.crop_h,w_start:w_start + self.crop_w]
        
        return image,label

# TODO
class Resize(object):   
    def __init__(self,size):  # 直接输入一个尺寸,进行重新变换大小
        self.size = size

    def __call__(self,image,label = None):
        image = cv2.resize(image,(self.size,self.size),interpolation=cv2.INTER_LINEAR)

        if label is not None:
            label = cv2.resize(label,(self.size,self.size),interpolation=cv2.INTER_NEAREST)

        return image,label

# TODO
class RandomFlip(object):
    def __call__(self,image,label=None):
        prob_of_flip = np.random.rand()  # 随机数,可能水平翻转的概率
        
        if prob_of_flip > 0.5:
            image = cv2.flip(image,1)
            if label is not None:
                label = cv2.flip(label,1)

        return image,label

# TODO
class RandomCrop(object):   # 随机裁剪一个子图出来
    def __init__(self,crop_size):
        self.crop_size = crop_size

    def __call__(self,image,label=None):
        h,w,c = image.shape
        top = np.random.uniform(h - self.crop_size)
        left = np.random.uniform(w - self.crop_size)

        assert top >= 0,"Error:crop_size > image_height!"
        assert left >= 0,"Error:crop_size > image_width!"

        ract = np.array([
            int(left),int(top),int(left + self.crop_size),int(top + self.crop_size)
        ])

        image = image[ract[1]:ract[3],ract[0]:ract[2],:]
        if label is not None:
            label = label[ract[1]:ract[3],ract[0]:ract[2]]

        return image,label

# TODO
class Scale(object):
    def __call__(self,image,label=None,scale = 1.0):
        if not isinstance(scale,(list,tuple)):
            scale = [scale,scale]
        h,w,c = image.shape
        image = cv2.resize(image,(int(w * scale[0]),int(h * scale[1])),interpolation = cv2.INTER_LINEAR)

        if label is not None:
            label = cv2.resize(label,(int(w * scale[0]),int(h * scale[1])),interpolation=cv2.INTER_NEAREST)

        return image,label

# TODO
class RandomScale(object):  # 随机放大或者缩小一定的比例
    def __init__(self,min_scale = 0.5,max_scale = 2.0,step = 0.25):
        self.min_scale = min_scale
        self.max_scale = max_scale
        self.step = step
        self.scale = Scale()
    
    def __call__(self,image,label=None):
        
        if self.step==0:
            self.random_scale = np.random.uniform(self.min_scale,self.max_scale)[0]
        else:
            num_steps = int((self.max_scale - self.min_scale) / self.step + 1)
            scale_factors = np.linspace(self.min_scale,self.max_scale,num_steps)

            np.random.shuffle(scale_factors)
            self.random_scale = scale_factors[0]

        image,label = self.scale(image,label,self.random_scale)

        return image, label

        
def main():
    image = cv2.imread('./work/dummy_data/JPEGImages/2008_000064.jpg')
    label = cv2.imread('./work/dummy_data/GroundTruth_trainval_png/2008_000064.png')

    # TODO: crop_size
    crop_size = 256
    # TODO: Transform: RandomSacle, RandomFlip, Pad, RandomCrop
    transforms = [RandomScale(),
                  Scale(),
                  RandomFlip(),
                  Resize(100),
                  Pad(crop_size,mean_val=[0.485,0.456,0.406]),
                  RandomCrop(crop_size),
                  CenterCrop(crop_size),
                  ConvertDataType(),
                  Normalize(0,1),
                  ]

    for i in range(len(transforms)):
        # TODO: call transform
        augment = Compose([transforms[i]])
        newimage,newlabel = augment(image,label)
        # TODO: save image
        cv2.imwrite('new_image' + str(i) + '.png',newimage)
        cv2.imwrite('new_label' + str(i) + '.png',newlabel)

if __name__ == "__main__":
    main()

模型的训练:

训练过程其实很好理解,就是将数据传入模型,通过正向计算损失,反向传播梯度,完成一个优化过程。

import os
import paddle
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
import numpy as np
import argparse
from utils import AverageMeter
from basic_model import BasicModel
from basic_dataloader import BasicDataLoader
from basic_seg_loss import Basic_SegLoss
from basic_data_preprocessing import TrainAugmentation


parser = argparse.ArgumentParser()
parser.add_argument('--net', type=str, default='basic')
parser.add_argument('--lr', type=float, default=0.001)
parser.add_argument('--num_epochs', type=int, default=10)
parser.add_argument('--batch_size', type=int, default=4)
parser.add_argument('--image_folder', type=str, default='./work/dummy_data')
parser.add_argument('--image_list_file', type=str, default='./work/dummy_data/list.txt')
parser.add_argument('--checkpoint_folder', type=str, default='./work/output')
parser.add_argument('--save_freq', type=int, default=2)


args = parser.parse_args()

def train(dataloader, model, criterion, optimizer, epoch, total_batch):
    model.train()
    train_loss_meter = AverageMeter()
    for batch_id, data in enumerate(dataloader):
        #TODO:
        image = data[0]  # 图片
        label = data[1]  # 标签

        image = fluid.layers.transpose(image,(0,3,1,2))

        pred = model(image)

        loss = criterion(pred,label)

        loss.backward()
        optimizer.minimize(loss)
        model.clear_gradients()

        n = image.shape[0]
        train_loss_meter.update(loss.numpy()[0], n)
        print(f"Epoch[{epoch:03d}/{args.num_epochs:03d}], " +
                f"Step[{batch_id:04d}/{total_batch:04d}], " +
                f"Average Loss: {train_loss_meter.avg:4f}")

    return train_loss_meter.avg



def main():
    # Step 0: preparation
    place = paddle.fluid.CUDAPlace(0)  # 使用CPU进行训练
    with fluid.dygraph.guard(place):
        # Step 1: Define training dataloader  第一步:加载训练数据
        transform = TrainAugmentation(256)  # 数据增强
        #TODO: create dataloader
        dataloader = BasicDataLoader(  # 自定义的数据加载器
                # image_folder=r"./work/dummy_data",
                image_folder = args.image_folder,
                # image_list_file="./work/dummy_data/list.txt",
                image_list_file = args.image_list_file,
                transform=transform,
                shuffle=True
                )  # 创建自己的数据加载器 自己定义的
        
        train_dataloader = fluid.io.DataLoader.from_generator(capacity=1,use_multiprocess=False)  # 创建paddle的数据加载器

        # TODO: set sample generator for fluid dataloader  为paddle的数据加载器设置参数 
        train_dataloader.set_sample_generator(dataloader,
                                        batch_size = args.batch_size,
                                        places=place)

        total_batch = int(len(dataloader) / args.batch_size)  # 总的数据量 / 每一次读取的数据量 = 读取次数

        # Step 2: Create model
        if args.net == "basic":
            #TODO: create basicmodel
            model = BasicModel(num_classes=256)
        else:
            raise NotImplementedError(f"args.net: {args.net} is not Supported!")

        # Step 3: Define criterion and optimizer
        criterion = Basic_SegLoss

        # create optimizer
        optimizer = AdamOptimizer(learning_rate = args.lr,parameter_list = model.parameters())
        
        # Step 4: Training
        for epoch in range(1, args.num_epochs+1):
            train_loss = train(train_dataloader,
                               model,
                               criterion,
                               optimizer,
                               epoch,
                               total_batch)
            print(f"----- Epoch[{epoch}/{args.num_epochs}] Train Loss: {train_loss:.4f}")

            if epoch % args.save_freq == 0 or epoch == args.num_epochs:
                model_path = os.path.join(args.checkpoint_folder, f"{args.net}-Epoch-{epoch}-Loss-{train_loss}")

                # TODO: save model and optmizer states
                model_dict = model.state_dict()
                fluid.save_dygraph(model_dict,model_path)
                optimizer_dict = optimizer.state_dict()
                fluid.save_dygraph(optimizer_dict,model_path)


                print(f'----- Save model: {model_path}.pdparams')
                print(f'----- Save optimizer: {model_path}.pdopt')



if __name__ == "__main__":
    main()

模型的损失:

就是使用基本的交叉熵损失,还可以使用辅助损失(像PSPNet中的aux loss)

import paddle
import paddle.fluid as fluid
import numpy as np
import cv2

eps = 1e-8

def Basic_SegLoss(preds, labels, ignore_index=255):
    n, c, h, w = preds.shape

    # TODO: create softmax_with_cross_entropy criterion
    criterion = fluid.layers.softmax_with_cross_entropy  # 创建一个评价器(使用交叉熵和softmax)
    
    # TODO: transpose preds to NxHxWxC
    preds = fluid.layers.transpose(preds,(0,2,3,1))  # 对维度进行变换以匹配label

    mask = labels!=ignore_index
    mask = fluid.layers.cast(mask, 'float32')

    # TODO: call criterion and compute loss
    print('preds shape:',preds.shape)
    print('labels shape:',labels.shape)

    loss = criterion(logits=preds,label=labels)
    
    loss = loss * mask
    avg_loss = fluid.layers.mean(loss) / (fluid.layers.mean(mask) + eps)

    return avg_loss

def main():
    label = cv2.imread('work/dummy_data/GroundTruth_trainval_png/2008_000026.png')  # 真实标签
    label = cv2.cvtColor(label, cv2.COLOR_BGR2GRAY).astype(np.int64)  # 将标签转换为灰度图
    pred = np.random.uniform(0, 1, (1, 59, label.shape[0], label.shape[1])).astype(np.float32)  # 预测值 n * c * w * h
    label = label[:,:,np.newaxis]  # 多增加一个维度
    label = label[np.newaxis, :, :, :]  # 增加维度  [1,h,w,1]

    with fluid.dygraph.guard(fluid.CPUPlace()):
        pred = fluid.dygraph.to_variable(pred)  # 转换为paddle可以接受的数据结构
        label = fluid.dygraph.to_variable(label)
        loss = Basic_SegLoss(pred, label)  # 计算segloss
        print(loss)

if __name__ == "__main__":
    main()

总结:

理论上,可以基于以上的流程完成Paddle任意模型的搭建和训练。哈哈哈,很开心,期待看到自己的预测结果(可惜这一块有点差,看着模型干瞪眼不会做预测很难受,哪位大佬可以教教我吗?)