聊一下参加本次课程的一些体会,因为本人之前对于FCN、PSPNet、DepLab系列等经典的分割网络仅仅停留在理论阶段,并不会编码复现,当我正苦苦纠结于如何复现时,碰巧看到了这个课程,哇,真的是美滋滋,解决了我第一次复现网络的大难题。通过“手敲代码的神”朱老师的现场coding,学习到了如何从零使用Paddle框架搭建一个深度学习的网络,因为之前学习过xx流(友军实锤),感觉Paddle框架还是挺好使用的,虽然我代码能力不强,但是基于文档还是可以写一点东西出来的。
基础框架搭建流程
- 下面简单看一下基础的基于Paddle动态图的网络搭建流程:
- Paddle的数据加载方式:
- 对于数据训练的增强操作:
- 模型的训练:
- 模型的损失:
- 总结:
下面简单看一下基础的基于Paddle动态图的网络搭建流程:
1.设置模型的运行环境。
2.创建动态图。
3.声明模型。
4.输入数据(注意格式转换)。
5.使用模型进行计算输出。
6.输出数据。
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import to_variable # 转换为paddle数据格式
from paddle.fluid.dygraph import Pool2D #TODO
from paddle.fluid.dygraph import Conv2D #TODO
import numpy as np
np.set_printoptions(precision=2)
class BasicModel(fluid.dygraph.Layer):
# BasicModel contains: BasicModel就是我们的网络
# 1. pool: 4x4 max pool op, with stride 4
# 2. conv: 3x3 kernel size, takes RGB image as input and output num_classes channels,
# note that the feature map size should be the same
# 3. upsample: upsample to input size
#
# TODOs:
# 1. The model takes an random input tensor with shape (1, 3, 8, 8)
# 2. The model outputs a tensor with same HxW size of the input, but C = num_classes
# 3. Print out the model output in numpy format
def __init__(self, num_classes=59):
super(BasicModel, self).__init__()
# 属性即为paddle中的操作函数
self.pool = Pool2D(pool_size=2,pool_stride=2) # 定义池化操作
self.conv = Conv2D(num_channels=3, num_filters=num_classes,filter_size=1) # 定义卷积操作
def forward(self, inputs): # inputs的维度(n,c,h,w)
x = self.pool(inputs) # 最大池化,图片大小缩小一半
x = fluid.layers.interpolate(x, out_shape=(inputs.shape[2], inputs.shape[3])) # 上采样,维度为out_shape(h,w)
x = self.conv(x) # 卷积操作,提取特征
return x
def main():
place = paddle.fluid.CPUPlace()
with fluid.dygraph.guard(place):
model = BasicModel(num_classes=59)
model.eval()
input_data = np.random.rand(1,3,8,8).astype(np.float32) # 随机初始化一个np.array量
print('Input data shape: ', input_data.shape)
input_data = to_variable(input_data) # 将np.array转换为paddle支持的格式
output_data = model(input_data) # 通过模型计算输出值
output_data = output_data.numpy() # 将模型计算出的tensor转换为numpy
print('Output data shape: ', output_data.shape)
if __name__ == "__main__":
main()
Paddle的数据加载方式:
框架已经写好了数据加载格式,我们只需要自定义BasicDataLoader类即可。
import os
import random
import numpy as np
import cv2
import paddle.fluid as fluid
# 数据增强类
class Transform(object):
def __init__(self,size = 256):
self.size = size
def __call__(self,input,label):
# 对输入的数据和标签进行增强
input = cv2.resize(input,(self.size,self.size),interpolation = cv2.INTER_LINEAR)
label = cv2.resize(label,(self.size,self.size),interpolation = cv2.INTER_NEAREST)
return input,label
# 基础数据加载类
class BasicDataLoader(object):
def __init__(self,
image_folder,
image_list_file,
transform=None,
shuffle=True):
self.image_folder = image_folder
self.image_list_file = image_list_file
self.transform = transform
self.shuffle = shuffle
self.data_list = self.read_list() # data_list属性的值为read_list()函数的返回值
def read_list(self):
data_list = []
with open(self.image_list_file) as infile:
for line in infile:
data_path = os.path.join(self.image_folder,line.split()[0])
label_path = os.path.join(self.image_folder,line.split()[1])
data_list.append((data_path,label_path))
random.shuffle(data_list)
return data_list
def preprocess(self, data, label):
h,w,c = data.shape
h_gt,w_gt = label.shape
assert h == h_gt,"ERROR"
assert w == w_gt,"ERROR"
if self.transform:
data,label = self.transform(data,label)
label = label[:,:,np.newaxis] # 给lable多一维
return data,label
# 方法复写
def __len__(self):
return len(self.data_list)
def __call__(self):
for data_path,label_path in self.data_list:
data = cv2.imread(data_path,cv2.IMREAD_COLOR)
data = cv2.cvtColor(data,cv2.COLOR_BGR2RGB)
label = cv2.imread(label_path,cv2.IMREAD_GRAYSCALE)
print(data.shape,label.shape)
data,label = self.preprocess(data,label)
yield data,label
def main():
batch_size = 5
place = fluid.CPUPlace()
with fluid.dygraph.guard(place):
transform = Transform(256)
# TODO: create BasicDataloder instance
basic_dataloader = BasicDataLoader(
image_folder=r"./work/dummy_data",
image_list_file="./work/dummy_data/list.txt",
transform=transform,
shuffle=True
) # 创建自己的数据加载器 自己定义的
# image_folder="./dummy_data"
# image_list_file="./dummy_data/list.txt"
# TODO: create fluid.io.DataLoader instance
dataloader = fluid.io.DataLoader.from_generator(capacity=1,use_multiprocess=False) # 创建paddle的数据加载器
# TODO: set sample generator for fluid dataloader 为paddle的数据加载器设置参数
dataloader.set_sample_generator(basic_dataloader,
batch_size=batch_size,
places=place)
num_epoch = 2
for epoch in range(1, num_epoch+1):
print(f'Epoch [{epoch}/{num_epoch}]:')
for idx, (data, label) in enumerate(dataloader):
print(f'Iter {idx}, Data shape: {data.shape}, Label shape: {label.shape}')
if __name__ == "__main__":
main()
对于数据训练的增强操作:
对于训练样本不足的情况,使用数据增强(平移,裁剪,缩放,对称,归一化等),主要是基于OpenCV的一些数字图像处理的图像预处理操作。
import cv2
import numpy as np
class Compose(object): # 用来管理各个转换的
def __init__(self, transforms):
self.transforms = transforms # 各个transform操作集合
def __call__(self, image, label=None):
for t in self.transforms:
image, label = t(image, label)
return image, label
class Normalize(object): # 数据标准化 将像素值归一标准化
def __init__(self, mean_val, std_val, val_scale=1):
# set val_scale = 1 if mean and std are in range (0,1)
# set val_scale to other value, if mean and std are in range (0,255)
self.mean = np.array(mean_val, dtype=np.float32) # 数据格式为np.array
self.std = np.array(std_val, dtype=np.float32)
self.val_scale = 1/255.0 if val_scale==1 else 1
def __call__(self, image, label=None):
image = image.astype(np.float32) # 将数据转换为numpy.array
image = image * self.val_scale
image = image - self.mean
image = image * (1 / self.std)
return image, label
class ConvertDataType(object): # 转换数据格式
def __call__(self, image, label=None):
if label is not None:
label = label.astype(np.int64) # lable转换为int64
return image.astype(np.float32), label # image转换为float32
class Pad(object):
def __init__(self, size, ignore_label=255, mean_val=0, val_scale=1):
# set val_scale to 1 if mean_val is in range (0, 1)
# set val_scale to 255 if mean_val is in range (0, 255)
factor = 255 if val_scale == 1 else 1
self.size = size
self.ignore_label = ignore_label
self.mean_val=mean_val
# from 0-1 to 0-255
if isinstance(self.mean_val, (tuple,list)):
self.mean_val = [int(x* factor) for x in self.mean_val]
else:
self.mean_val = int(self.mean_val * factor)
def __call__(self, image, label=None):
h, w, c = image.shape
pad_h = max(self.size - h, 0)
pad_w = max(self.size - w, 0)
pad_h_half = int(pad_h / 2)
pad_w_half = int(pad_w / 2)
if pad_h > 0 or pad_w > 0:
image = cv2.copyMakeBorder(image,
top=pad_h_half,
left=pad_w_half,
bottom=pad_h - pad_h_half,
right=pad_w - pad_w_half,
borderType=cv2.BORDER_CONSTANT,
value=self.mean_val)
if label is not None:
label = cv2.copyMakeBorder(label,
top=pad_h_half,
left=pad_w_half,
bottom=pad_h - pad_h_half,
right=pad_w - pad_w_half,
borderType=cv2.BORDER_CONSTANT,
value=self.ignore_label)
return image, label
# TODO
class CenterCrop(object): # CenterCrop是以输入图的中心点为中心点做指定size的crop操作
def __init__(self,crop_size):
self.crop_h = crop_size
self.crop_w = crop_size
def __call__(self,image,label=None): # iamge 为 HWC
h,w,c = image.shape
h_start = (h - self.crop_h) // 2
w_start = (w - self.crop_w) // 2
image = image[h_start:h_start + self.crop_h,w_start:w_start + self.crop_w,:]
if label is not None:
label = label[h_start:h_start + self.crop_h,w_start:w_start + self.crop_w]
return image,label
# TODO
class Resize(object):
def __init__(self,size): # 直接输入一个尺寸,进行重新变换大小
self.size = size
def __call__(self,image,label = None):
image = cv2.resize(image,(self.size,self.size),interpolation=cv2.INTER_LINEAR)
if label is not None:
label = cv2.resize(label,(self.size,self.size),interpolation=cv2.INTER_NEAREST)
return image,label
# TODO
class RandomFlip(object):
def __call__(self,image,label=None):
prob_of_flip = np.random.rand() # 随机数,可能水平翻转的概率
if prob_of_flip > 0.5:
image = cv2.flip(image,1)
if label is not None:
label = cv2.flip(label,1)
return image,label
# TODO
class RandomCrop(object): # 随机裁剪一个子图出来
def __init__(self,crop_size):
self.crop_size = crop_size
def __call__(self,image,label=None):
h,w,c = image.shape
top = np.random.uniform(h - self.crop_size)
left = np.random.uniform(w - self.crop_size)
assert top >= 0,"Error:crop_size > image_height!"
assert left >= 0,"Error:crop_size > image_width!"
ract = np.array([
int(left),int(top),int(left + self.crop_size),int(top + self.crop_size)
])
image = image[ract[1]:ract[3],ract[0]:ract[2],:]
if label is not None:
label = label[ract[1]:ract[3],ract[0]:ract[2]]
return image,label
# TODO
class Scale(object):
def __call__(self,image,label=None,scale = 1.0):
if not isinstance(scale,(list,tuple)):
scale = [scale,scale]
h,w,c = image.shape
image = cv2.resize(image,(int(w * scale[0]),int(h * scale[1])),interpolation = cv2.INTER_LINEAR)
if label is not None:
label = cv2.resize(label,(int(w * scale[0]),int(h * scale[1])),interpolation=cv2.INTER_NEAREST)
return image,label
# TODO
class RandomScale(object): # 随机放大或者缩小一定的比例
def __init__(self,min_scale = 0.5,max_scale = 2.0,step = 0.25):
self.min_scale = min_scale
self.max_scale = max_scale
self.step = step
self.scale = Scale()
def __call__(self,image,label=None):
if self.step==0:
self.random_scale = np.random.uniform(self.min_scale,self.max_scale)[0]
else:
num_steps = int((self.max_scale - self.min_scale) / self.step + 1)
scale_factors = np.linspace(self.min_scale,self.max_scale,num_steps)
np.random.shuffle(scale_factors)
self.random_scale = scale_factors[0]
image,label = self.scale(image,label,self.random_scale)
return image, label
def main():
image = cv2.imread('./work/dummy_data/JPEGImages/2008_000064.jpg')
label = cv2.imread('./work/dummy_data/GroundTruth_trainval_png/2008_000064.png')
# TODO: crop_size
crop_size = 256
# TODO: Transform: RandomSacle, RandomFlip, Pad, RandomCrop
transforms = [RandomScale(),
Scale(),
RandomFlip(),
Resize(100),
Pad(crop_size,mean_val=[0.485,0.456,0.406]),
RandomCrop(crop_size),
CenterCrop(crop_size),
ConvertDataType(),
Normalize(0,1),
]
for i in range(len(transforms)):
# TODO: call transform
augment = Compose([transforms[i]])
newimage,newlabel = augment(image,label)
# TODO: save image
cv2.imwrite('new_image' + str(i) + '.png',newimage)
cv2.imwrite('new_label' + str(i) + '.png',newlabel)
if __name__ == "__main__":
main()
模型的训练:
训练过程其实很好理解,就是将数据传入模型,通过正向计算损失,反向传播梯度,完成一个优化过程。
import os
import paddle
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
import numpy as np
import argparse
from utils import AverageMeter
from basic_model import BasicModel
from basic_dataloader import BasicDataLoader
from basic_seg_loss import Basic_SegLoss
from basic_data_preprocessing import TrainAugmentation
parser = argparse.ArgumentParser()
parser.add_argument('--net', type=str, default='basic')
parser.add_argument('--lr', type=float, default=0.001)
parser.add_argument('--num_epochs', type=int, default=10)
parser.add_argument('--batch_size', type=int, default=4)
parser.add_argument('--image_folder', type=str, default='./work/dummy_data')
parser.add_argument('--image_list_file', type=str, default='./work/dummy_data/list.txt')
parser.add_argument('--checkpoint_folder', type=str, default='./work/output')
parser.add_argument('--save_freq', type=int, default=2)
args = parser.parse_args()
def train(dataloader, model, criterion, optimizer, epoch, total_batch):
model.train()
train_loss_meter = AverageMeter()
for batch_id, data in enumerate(dataloader):
#TODO:
image = data[0] # 图片
label = data[1] # 标签
image = fluid.layers.transpose(image,(0,3,1,2))
pred = model(image)
loss = criterion(pred,label)
loss.backward()
optimizer.minimize(loss)
model.clear_gradients()
n = image.shape[0]
train_loss_meter.update(loss.numpy()[0], n)
print(f"Epoch[{epoch:03d}/{args.num_epochs:03d}], " +
f"Step[{batch_id:04d}/{total_batch:04d}], " +
f"Average Loss: {train_loss_meter.avg:4f}")
return train_loss_meter.avg
def main():
# Step 0: preparation
place = paddle.fluid.CUDAPlace(0) # 使用CPU进行训练
with fluid.dygraph.guard(place):
# Step 1: Define training dataloader 第一步:加载训练数据
transform = TrainAugmentation(256) # 数据增强
#TODO: create dataloader
dataloader = BasicDataLoader( # 自定义的数据加载器
# image_folder=r"./work/dummy_data",
image_folder = args.image_folder,
# image_list_file="./work/dummy_data/list.txt",
image_list_file = args.image_list_file,
transform=transform,
shuffle=True
) # 创建自己的数据加载器 自己定义的
train_dataloader = fluid.io.DataLoader.from_generator(capacity=1,use_multiprocess=False) # 创建paddle的数据加载器
# TODO: set sample generator for fluid dataloader 为paddle的数据加载器设置参数
train_dataloader.set_sample_generator(dataloader,
batch_size = args.batch_size,
places=place)
total_batch = int(len(dataloader) / args.batch_size) # 总的数据量 / 每一次读取的数据量 = 读取次数
# Step 2: Create model
if args.net == "basic":
#TODO: create basicmodel
model = BasicModel(num_classes=256)
else:
raise NotImplementedError(f"args.net: {args.net} is not Supported!")
# Step 3: Define criterion and optimizer
criterion = Basic_SegLoss
# create optimizer
optimizer = AdamOptimizer(learning_rate = args.lr,parameter_list = model.parameters())
# Step 4: Training
for epoch in range(1, args.num_epochs+1):
train_loss = train(train_dataloader,
model,
criterion,
optimizer,
epoch,
total_batch)
print(f"----- Epoch[{epoch}/{args.num_epochs}] Train Loss: {train_loss:.4f}")
if epoch % args.save_freq == 0 or epoch == args.num_epochs:
model_path = os.path.join(args.checkpoint_folder, f"{args.net}-Epoch-{epoch}-Loss-{train_loss}")
# TODO: save model and optmizer states
model_dict = model.state_dict()
fluid.save_dygraph(model_dict,model_path)
optimizer_dict = optimizer.state_dict()
fluid.save_dygraph(optimizer_dict,model_path)
print(f'----- Save model: {model_path}.pdparams')
print(f'----- Save optimizer: {model_path}.pdopt')
if __name__ == "__main__":
main()
模型的损失:
就是使用基本的交叉熵损失,还可以使用辅助损失(像PSPNet中的aux loss)
import paddle
import paddle.fluid as fluid
import numpy as np
import cv2
eps = 1e-8
def Basic_SegLoss(preds, labels, ignore_index=255):
n, c, h, w = preds.shape
# TODO: create softmax_with_cross_entropy criterion
criterion = fluid.layers.softmax_with_cross_entropy # 创建一个评价器(使用交叉熵和softmax)
# TODO: transpose preds to NxHxWxC
preds = fluid.layers.transpose(preds,(0,2,3,1)) # 对维度进行变换以匹配label
mask = labels!=ignore_index
mask = fluid.layers.cast(mask, 'float32')
# TODO: call criterion and compute loss
print('preds shape:',preds.shape)
print('labels shape:',labels.shape)
loss = criterion(logits=preds,label=labels)
loss = loss * mask
avg_loss = fluid.layers.mean(loss) / (fluid.layers.mean(mask) + eps)
return avg_loss
def main():
label = cv2.imread('work/dummy_data/GroundTruth_trainval_png/2008_000026.png') # 真实标签
label = cv2.cvtColor(label, cv2.COLOR_BGR2GRAY).astype(np.int64) # 将标签转换为灰度图
pred = np.random.uniform(0, 1, (1, 59, label.shape[0], label.shape[1])).astype(np.float32) # 预测值 n * c * w * h
label = label[:,:,np.newaxis] # 多增加一个维度
label = label[np.newaxis, :, :, :] # 增加维度 [1,h,w,1]
with fluid.dygraph.guard(fluid.CPUPlace()):
pred = fluid.dygraph.to_variable(pred) # 转换为paddle可以接受的数据结构
label = fluid.dygraph.to_variable(label)
loss = Basic_SegLoss(pred, label) # 计算segloss
print(loss)
if __name__ == "__main__":
main()
总结:
理论上,可以基于以上的流程完成Paddle任意模型的搭建和训练。哈哈哈,很开心,期待看到自己的预测结果(可惜这一块有点差,看着模型干瞪眼不会做预测很难受,哪位大佬可以教教我吗?)