tensorflow2.3实现街景语义分割
Cityscapes评测数据集即城市景观数据集,在2015年由奔驰公司推动发布,是目前公认的机器视觉领域内最具权威性和专业性的图像分割数据集之一。提供了8种30个类别的语义级别、实例级别以及密集像素标注(包括平坦表面、人、车辆、建筑、物体、自然、天空、空)。Cityscapes拥有5000张精细标注的在城市环境中驾驶场景的图像(2975train,500 val,1525test)。它具有19个类别的密集像素标注(97%coverage),其中8个具有实例级分割。数据是从50个城市中持续数月采集而来,涵盖不同的时间以及好的天气情况。开始起以视频形式存储,因此该数据集按照以下特点手动选出视频的帧:大量的动态物体,变化的场景布局以及变化的背景。
代码
导入包
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import glob
显存自适应分配,查看tensorflow 的版本
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
tf.__version__
- ‘2.3.0’
读取数据,图像数据
images = glob.glob('./dataset/cityscapes/leftImg8bit/train/*/*.png')
print(len(img))
img[:5]
- 2975
- [’./dataset/cityscapes/leftImg8bit/train/dusseldorf/dusseldorf_000128_000019_leftImg8bit.png’,
‘./dataset/cityscapes/leftImg8bit/train/dusseldorf/dusseldorf_000113_000019_leftImg8bit.png’,
‘./dataset/cityscapes/leftImg8bit/train/dusseldorf/dusseldorf_000014_000019_leftImg8bit.png’,
‘./dataset/cityscapes/leftImg8bit/train/dusseldorf/dusseldorf_000207_000019_leftImg8bit.png’,
‘./dataset/cityscapes/leftImg8bit/train/dusseldorf/dusseldorf_000216_000019_leftImg8bit.png’]
标签数据
label = glob.glob('./dataset/cityscapes/gtFine/train/*/*_gtFine_labelIds.png')
print(len(label))
label[:5]
- 2975
- [’./dataset/cityscapes/gtFine/train/dusseldorf/dusseldorf_000015_000019_gtFine_labelIds.png’,
‘./dataset/cityscapes/gtFine/train/dusseldorf/dusseldorf_000213_000019_gtFine_labelIds.png’,
‘./dataset/cityscapes/gtFine/train/dusseldorf/dusseldorf_000164_000019_gtFine_labelIds.png’,
‘./dataset/cityscapes/gtFine/train/dusseldorf/dusseldorf_000050_000019_gtFine_labelIds.png’,
‘./dataset/cityscapes/gtFine/train/dusseldorf/dusseldorf_000072_000019_gtFine_labelIds.png’]
为了把图像数据和标签数据是一一对应的,所以按照名称进行排序。
img.sort(key=lambda x: x.split('/')[-1].split('.png')[0])
label.sort(key=lambda x: x.split('/')[-1].split('.png')[0])
创建乱序的索引
index = np.random.permutation(len(img))
乱序后查看图像和标签数据
img = np.array(img)[index]
label = np.array(label)[index]
乱序后保持图像和标签还是一一对应的。
img[:5]
- array([’./dataset/cityscapes/leftImg8bit/train/stuttgart/stuttgart_000195_000019_leftImg8bit.png’,
‘./dataset/cityscapes/leftImg8bit/train/tubingen/tubingen_000047_000019_leftImg8bit.png’,
‘./dataset/cityscapes/leftImg8bit/train/monchengladbach/monchengladbach_000000_019682_leftImg8bit.png’,
‘./dataset/cityscapes/leftImg8bit/train/dusseldorf/dusseldorf_000075_000019_leftImg8bit.png’,
./dataset/cityscapes/leftImg8bit/train/monchengladbach/monchengladbach_000000_010733_leftImg8bit.png’], dtype=’<U158’)
label[:5]
- array([’./dataset/cityscapes/gtFine/train/stuttgart/stuttgart_000195_000019_gtFine_labelIds.png’,
‘./dataset/cityscapes/gtFine/train/tubingen/tubingen_000047_000019_gtFine_labelIds.png’, ‘./dataset/cityscapes/gtFine/train/monchengladbach/monchengladbach_000000_019682_gtFine_labelIds.png’, ./dataset/cityscapes/gtFine/train/dusseldorf/dusseldorf_000075_000019_gtFine_labelIds.png’, ‘./dataset/cityscapes/gtFine/train/monchengladbach/monchengladbach_000000_010733_gtFine_labelIds.png’], dtype=’<U157’)
创建测试集
img_val = glob.glob('./dataset/cityscapes/leftImg8bit/val/*/*.png')
label_val = glob.glob('./dataset/cityscapes/gtFine/val/*/*_gtFine_labelIds.png')
len(img_val), len(label_val)
- (500, 500)
测试集的图形和标签按照名字排序
img_val.sort(key=lambda x: x.split('/')[-1].split('.png')[0])
label_val.sort(key=lambda x: x.split('/')[-1].split('.png')[0])
测试集的数量
val_count = len(img_val)
val_count
- 500
训练集的数量
train_count = len(img)
train_count
- 2975
构建训练集的dataset
dataset_train = tf.data.Dataset.from_tensor_slices((img, label))
dataset_train
- <TensorSliceDataset shapes: ((), ()), types: (tf.string, tf.string)>
构建测试集的dataset
dataset_val = tf.data.Dataset.from_tensor_slices((img_val, label_val))
dataset_val
- <TensorSliceDataset shapes: ((), ()), types: (tf.string, tf.string)>
封装加载图像数据解码函数
def read_png(path):
img = tf.io.read_file(path)
img = tf.image.decode_png(img, channels=3)
return img
封装加载标签数据解码函数
def read_png_label(path):
img = tf.io.read_file(path)
img = tf.image.decode_png(img, channels=1)
return img
测试dataset中的数据
img_1 = read_png(img[0])
label_1 = read_png_label(label[0])
img_1.shape
label_1.shape
- TensorShape([1024, 2048, 3])
- TensorShape([1024, 2048, 1])
plt.imshow(img_1)
plt.imshow(label_1)
数据增强
concat = tf.concat([img_1, label_1], axis=-1)
concat.shape
- TensorShape([1024, 2048, 4])
用函数tf.concat把图像和标签叠加到一起后图像通道变为4维了,3+1=4
自定义数据增强函数
def crop_img(img, mask):
concat_img = tf.concat([img, mask], axis=-1) #两张图片叠加在一起裁剪
concat_img = tf.image.resize(concat_img, (280,280), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
crop_img = tf.image.random_crop(concat_img, [256, 256, 4]) #裁剪
return crop_img[:,:,:3], crop_img[:,:,3:]
该函数返回的是图像数据和标签数据,利用切片的方式返回。
测试一下
img_1, label_1 = crop_img(img_1, label_1)
img_1.shape, label_1.shape
- (TensorShape([256, 256, 3]), TensorShape([256, 256, 1]))
plt.subplot(1,2,1)
plt.imshow(img_1.numpy())
plt.subplot(1,2,2)
plt.imshow(label_1.numpy()) #plt.imshow(np.squeeze(label_1.numpy())
图像形状变化和归一化
def normal(img, mask):
img = tf.cast(img, tf.float32)/127.5 -1
mask = tf.cast(mask, tf.int32)
return img, mask
封装加载训练集图像数预处理
def load_image_train(img_path, mask_path):
img = read_png(img_path)
mask = read_png_label(mask_path)
img, mask = crop_img(img, mask)
if tf.random.uniform(())>0.5:
img = tf.image.flip_left_right(img)
mask = tf.image.flip_left_right(mask)
img, mask = normal(img, mask)
return img, mask
封装加载测试集图像数预处理,测试集不用做图像增强
def load_image_val(img_path, mask_path):
img = read_png(img_path)
mask = read_png_label(mask_path)
img = tf.image.resize(img, (256, 256))
mask = tf.image.resize(mask, (256, 256))
img, mask = normal(img, mask)
return img, mask
构建dataset并应用到自定义函数上
dataset_train = dataset_train.map(load_image_train, num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset_val = dataset_val.map(load_image_val, num_parallel_calls=tf.data.experimental.AUTOTUNE)
设置批次大小和步数
BATCH_SIZE = 32
BUFFER_SIZE = 128
STEP_PER_EPOCH = train_count //BATCH_SIZE
VALIDATION_STEP = val_count //BATCH_SIZE
构建训练集和测试集的输入方式
dataset_train = dataset_train.cache().repeat().shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
dataset_train = dataset_train.prefetch(tf.data.experimental.AUTOTUNE)
dataset_val = dataset_val.cache().batch(BATCH_SIZE)
dataset_train
dataset_val
- <PrefetchDataset shapes: ((None, 256, 256, 3), (None, 256, 256, 1)), types: (tf.float32, tf.int32)>
- <BatchDataset shapes: ((None, 256, 256, 3), (None, 256, 256, 1)), types: (tf.float32, tf.int32)>
定义模型
label_1中图像的类别
np.unique(label_1.numpy())
- array([ 1, 4, 7, 11, 14, 15, 17, 20, 21, 22, 23, 26, 27], dtype=uint8)
def creat_model():
inputs = tf.keras.layers.Input(shape=(256,256,3))
#下采样
x = tf.keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu')(inputs)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x) #256*256*64
x1 = tf.keras.layers.MaxPooling2D()(x) #128*128*64
x1 = tf.keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu')(x1)
x1 = tf.keras.layers.BatchNormalization()(x1) #128*128*128
x1 = tf.keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu')(x1)
x1 = tf.keras.layers.BatchNormalization()(x1) #128*128*128
x2 = tf.keras.layers.MaxPooling2D()(x1) #64*64*128
x2 = tf.keras.layers.Conv2D(256, (3, 3), padding='same', activation='relu')(x2)
x2 = tf.keras.layers.BatchNormalization()(x2) #64*64*256
x2 = tf.keras.layers.Conv2D(256, (3, 3), padding='same', activation='relu')(x2)
x2 = tf.keras.layers.BatchNormalization()(x2) #64*64*256
x3 = tf.keras.layers.MaxPooling2D()(x2) #32*32*256
x3 = tf.keras.layers.Conv2D(512, (3, 3), padding='same', activation='relu')(x3)
x3 = tf.keras.layers.BatchNormalization()(x3) #32*32*512
x3 = tf.keras.layers.Conv2D(512, (3, 3), padding='same', activation='relu')(x3)
x3 = tf.keras.layers.BatchNormalization()(x3) #32*32*512
x4 = tf.keras.layers.MaxPooling2D()(x3) #16*16*256
x4 = tf.keras.layers.Conv2D(1024, (3, 3), padding='same', activation='relu')(x4)
x4 = tf.keras.layers.BatchNormalization()(x4) #16*16*1024
x4 = tf.keras.layers.Conv2D(1024, (3, 3), padding='same', activation='relu')(x4)
x4 = tf.keras.layers.BatchNormalization()(x4) #16*16*1024
#上采样
x5 = tf.keras.layers.Conv2DTranspose(512, (2, 2), padding="same", strides=2, activation='relu')(x4) #32*32*512
x5 = tf.keras.layers.BatchNormalization()(x5)
x6 = tf.concat([x5, x3], axis=-1) #32*32*1024
x6 = tf.keras.layers.Conv2D(512, (3, 3), padding='same', activation='relu')(x6) #32*32*512
x6 = tf.keras.layers.BatchNormalization()(x6)
x6 = tf.keras.layers.Conv2D(512, (3, 3), padding='same', activation='relu')(x6)#32*32*512
x6 = tf.keras.layers.BatchNormalization()(x6)
x7 = tf.keras.layers.Conv2DTranspose(256, (2, 2), padding="same", strides=2, activation='relu')(x6) #64*64*256
x7 = tf.keras.layers.BatchNormalization()(x7)
x8 = tf.concat([x7, x2], axis=-1) #64*64*512
x8 = tf.keras.layers.Conv2D(256, (3, 3), padding='same', activation='relu')(x8) #64*64*256
x8 = tf.keras.layers.BatchNormalization()(x8)
x8 = tf.keras.layers.Conv2D(256, (3, 3), padding='same', activation='relu')(x8)#64*64*256
x8 = tf.keras.layers.BatchNormalization()(x8)
x9 = tf.keras.layers.Conv2DTranspose(128, (2, 2), padding="same", strides=2, activation='relu')(x8) #128*128*128
x9 = tf.keras.layers.BatchNormalization()(x9)
x10 = tf.concat([x9, x1], axis=-1) #128*128*256
x10 = tf.keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu')(x10) #128*128*128
x10 = tf.keras.layers.BatchNormalization()(x10)
x10 = tf.keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu')(x10) #128*128*128
x10 = tf.keras.layers.BatchNormalization()(x10)
x11 = tf.keras.layers.Conv2DTranspose(64, (2, 2), padding="same", strides=2, activation='relu')(x10) #256*256*64
x11 = tf.keras.layers.BatchNormalization()(x11)
x12 = tf.concat([x11, x], axis=-1) #256*256*128
x12 = tf.keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu')(x12) #256*256*64
x12 = tf.keras.layers.BatchNormalization()(x12)
x12 = tf.keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu')(x12) #256*256*64
x12 = tf.keras.layers.BatchNormalization()(x12)
output = tf.keras.layers.Conv2D(34, (1,1), padding='same', activation='softmax')(x12) #256*256*34
return tf.keras.Model(inputs=inputs, outputs=output)
建立模型
model = creat_model()
tf.keras.metrics.MeanIoU(num_classes=34) # 根据独热编码进行计算
我们是顺序编码 需要更改类
class MeanIoU(tf.keras.metrics.MeanIoU):
def __call__(self, y_true, y_pred, sample_weight=None):
y_pred = tf.argmax(y_pred, axis=-1)
return super().__call__(y_true, y_pred, sample_weight=sample_weight)
模型编译
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])
设置训练次数
EPOCHS=60
模型训练
history = model.fit(dataset_train,
epochs=EPOCHS,
steps_per_epoch=STEP_PER_EPOCH,
validation_data=dataset_val,
validation_steps=VALIDATION_STEP)
训练过程损失函数可视化
plt.plot(history.epoch, history.history.get('loss'), 'r', label='Training loss')
plt.plot(history.epoch, history.history.get('val_loss'), 'b', label='Validation loss')
plt.xlabel('Epoch')
plt.ylabel('Loss Value')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()
训练过程正确率可视化
plt.plot(history.epoch, history.history.get('acc'), 'r', label='Training acc')
plt.plot(history.epoch, history.history.get('val_acc'), 'b', label='Validation acc')
plt.xlabel('Epoch')
plt.ylabel('acc Value')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()
测试
for image, mask in dataset_train.take(1):
pred_mask = model.predict(image)
pred_mask = tf.argmax(pred_mask, axis=-1)
pred_mask = pred_mask[..., tf.newaxis]
plt.figure(figsize=(10, 10))
for i in range(num):
plt.subplot(num, 3, i*num+1)
plt.imshow(tf.keras.preprocessing.image.array_to_img(image[i]))
plt.subplot(num, 3, i*num+2)
plt.imshow(tf.keras.preprocessing.image.array_to_img(mask[i]))
plt.subplot(num, 3, i*num+3)
plt.imshow(tf.keras.preprocessing.image.array_to_img(pred_mask[i]))