常见图像处理的任务
1、分类
给定一副图像,我们用计算机模型预测图片中有什么对象。
2、分类与定位
我们不仅要知道图片中的对象是什么,还要在对象的附近画一个边框,确定该对象所处的位置。
3、语义分割
区分到图中每一点像素点,而不仅仅是矩形框框住。
4、目标检测
目标检测简单来说就是回答图片里面有什么?分别在哪里?(并把它们使用矩形框框住)
5、实例分割
实例分割是目标检测和语义分割的结合。相对目标检测的边界框,实例分割可精确到物体的边缘;相对语义分割, 实例分割需要标出图上同一物体的不同个体。
图像定位
对于单纯的分类问题,比较容易理解,给定一副图片,我们输出一个标签类别,已经很熟悉。
定位比较复杂,需要输出四个数字(x,y,w,h),图像中某一个点的坐标(x,y),以及图像的宽度和高度,有了这四个数字,我们可以很容易地找到物体的边框。
Oxford-IIIT数据集
The Oxford-IIIT Pet Dataset是一个宠物图像数据集,包含37种宠物,每种宠物200张左右宠物图片,该数据集同时包含宠物分类、头部轮廓标注和语义分割信息。
先来看看图片定位
# -*- coding: UTF-8 -*-"""
Author: LGD
FileName: image_position
DateTime: 2020/12/25 09:45
SoftWare: PyCharm
"""import tensorflow as tf
import matplotlib.pyplot as plt
from lxml import etree
import numpy as np
import glob
from matplotlib.patches import Rectangle
print(tf.__version__)
print(tf.test.is_gpu_available())
img = tf.io.read_file('images/Abyssinian_1.jpg')
img = tf.image.decode_jpeg(img)
print(img.shape)
plt.imshow(img)
plt.show()
xml = open('annotations/xmls/Abyssinian_1.xml').read()
sel = etree.HTML(xml)
width = int(sel.xpath('//size/width/text()')[0])
print(width)
height = int(sel.xpath('//size/height/text()')[0])
xmin = int(sel.xpath('//bndbox/xmin/text()')[0])
xmax = int(sel.xpath('//bndbox/xmax/text()')[0])
ymin = int(sel.xpath('//bndbox/ymin/text()')[0])
ymax = int(sel.xpath('//bndbox/ymax/text()')[0])
print(height, xmin, xmax, ymin, ymax)
plt.imshow(img)
rect = Rectangle((xmin, ymin), (xmax-xmin), (ymax-ymin), fill=False, color='red')
ax = plt.gca()
ax.axes.add_patch(rect)
plt.show()
图片缩放与目标值的规范
img = tf.image.resize(img, [224, 224])
img = img / 255
plt.imshow(img)
plt.show()
xmin = (xmin/width)*224
xmax = (xmax/width)*224
ymin = (ymin/height)*224
ymax = (ymax/height)*224
plt.imshow(img)
rect = Rectangle((xmin, ymin), (xmax - xmin), (ymax - ymin), fill=False, color='red')
ax = plt.gca()
ax.axes.add_patch(rect)
plt.show()
模型训练
# -*- coding: UTF-8 -*-"""
Author: LGD
FileName: image_position
DateTime: 2020/12/25 09:45
SoftWare: PyCharm
"""import tensorflow as tf
import matplotlib.pyplot as plt
from lxml import etree
import numpy as np
import glob
from matplotlib.patches import Rectangle
print(tf.__version__)
print(tf.test.is_gpu_available())
# img = tf.io.read_file('images/Abyssinian_1.jpg')## img = tf.image.decode_jpeg(img)# print(img.shape)## plt.imshow(img)# plt.show()## xml = open('annotations/xmls/Abyssinian_1.xml').read()# sel = etree.HTML(xml)# width = int(sel.xpath('//size/width/text()')[0])# height = int(sel.xpath('//size/height/text()')[0])## xmin = int(sel.xpath('//bndbox/xmin/text()')[0])# xmax = int(sel.xpath('//bndbox/xmax/text()')[0])# ymin = int(sel.xpath('//bndbox/ymin/text()')[0])# ymax = int(sel.xpath('//bndbox/ymax/text()')[0])## print(height, xmin, xmax, ymin, ymax)## plt.imshow(img)# rect = Rectangle((xmin, ymin), (xmax - xmin), (ymax - ymin), fill=False, color='red')# ax = plt.gca()# ax.axes.add_patch(rect)# plt.show()## img = tf.image.resize(img, [224, 224])# img = img / 255# plt.imshow(img)# plt.show()## xmin = (xmin/width)*224# xmax = (xmax/width)*224# ymin = (ymin/height)*224# ymax = (ymax/height)*224### plt.imshow(img)# rect = Rectangle((xmin, ymin), (xmax - xmin), (ymax - ymin), fill=False, color='red')# ax = plt.gca()# ax.axes.add_patch(rect)# plt.show()
# 创建输入管道
images = glob.glob('images/*.jpg')
print(images[:5])
print(images[-5:])
print(len(images))
xmls = glob.glob('annotations/xmls/*.xml')
print(xmls[:5])
print(xmls[-5:])
print(len(xmls))
names = [x.split('\\')[-1].split('.xml')[0] for x in xmls]
print(names, len(names))
imgs_train = [img for img in images if img.split('\\')[-1].split('.jpg')[0] in names]
print(imgs_train, len(imgs_train))
imgs_test = [img for img in images if img.split('\\')[-1].split('.jpg')[0] not in names]
imgs_train.sort(key=lambda x: x.split('\\')[-1].split('.jpg')[0])
xmls.sort(key=lambda x: x.split('\\')[-1].split('.jpg')[0])
print(imgs_train[-5:])
print(xmls[-5:])
def to_labels(path):
xml = open('{}'.format(path)).read()
sel = etree.HTML(xml)
width = int(sel.xpath('//size/width/text()')[0])
height = int(sel.xpath('//size/height/text()')[0])
xmin = int(sel.xpath('//bndbox/xmin/text()')[0])
xmax = int(sel.xpath('//bndbox/xmax/text()')[0])
ymin = int(sel.xpath('//bndbox/ymin/text()')[0])
ymax = int(sel.xpath('//bndbox/ymax/text()')[0])
return [xmin / width, ymin / height, xmax / width, ymax / height]
labels = [to_labels(path) for path in xmls]
print(labels[:3])
out1, out2, out3, out4 = list(zip(*labels))
print(len(out1), len(out2), len(out3), len(out4))
out1 = np.array(out1)
out2 = np.array(out2)
out3 = np.array(out3)
out4 = np.array(out4)
label_datasets = tf.data.Dataset.from_tensor_slices((out1, out2, out3, out4))
print('label_datasets: ', label_datasets)
def load_image(path):
img = tf.io.read_file(path)
img = tf.image.decode_jpeg(img, channels=3)
img = tf.image.resize(img, (224, 224))
img = img / 127.5 - 1
return img
image_dataset = tf.data.Dataset.from_tensor_slices(imgs_train)
image_dataset = image_dataset.map(load_image)
print(image_dataset)
dataset = tf.data.Dataset.zip((image_dataset, label_datasets))
dataset = dataset.repeat().shuffle(len(imgs_train)).batch(32)
for img, label in dataset.take(1):
plt.imshow(tf.keras.preprocessing.image.array_to_img(img[0]))
out1, out2, out3, out4 = label
xmin, ymin, xmax, ymax = out1[0].numpy() * 224, out2[0].numpy() * 224, out3[0].numpy() * 224, out4[0].numpy() * 224
rect = Rectangle((xmin, ymin), (xmax - xmin), (ymax - ymin), fill=False, color='red')
ax = plt.gca()
ax.axes.add_patch(rect)
plt.show()
# 模型创建阶段
xception = tf.keras.applications.Xception(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
inputs = tf.keras.layers.Input(shape=(224, 224, 3))
x = xception(inputs)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(2048, activation='relu')(x)
x = tf.keras.layers.Dense(256, activation='relu')(x)
out1 = tf.keras.layers.Dense(1)(x)
out2 = tf.keras.layers.Dense(1)(x)
out3 = tf.keras.layers.Dense(1)(x)
out4 = tf.keras.layers.Dense(1)(x)
prediction = [out1, out2, out3, out4]
model = tf.keras.models.Model(inputs=inputs, outputs=prediction)
model.compile(
tf.keras.optimizers.Adam(learning_rate=0.0001),
loss='mse',
metrics=['mae']
)
EPOCHS = 50
history = model.fit(
dataset,
epochs=EPOCHS,
steps_per_epoch=len(imgs_train) // 32
)