1 准备
1.1 数据集
kaggle上Facial Keypoints Detection 若无法获取验证码,则可访问聚数力数据集
1.2安装包准备
pip install opencv-python
cv2包安装不了解决方案:project interpreter——show all——最下面一个——+——把python路径下的site-package添加进去(由用户添加即为添加)
2 人脸检测
复现该博主代码:手把手教你做人脸识别和关键点检测(基于tensorflow和opencv) 新建face detection.py.
import cv2
def ad_threshold(img):
th2 = cv2.adaptiveThreshold(img, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 19, 4) # 自适应二值化
return th2
def CatchUsbVideo(window_name, camera_index):
# 定义主函数
cv2.namedWindow(window_name) # 创建摄像头窗口
cap = cv2.VideoCapture(0, cv2.CAP_DSHOW) # 调用摄像头(一般电脑自带摄像头index为0)
# 调用分类(人脸识别分类器是cv一个预训练的模型,文件名为haarcascade_frontalface_alt2.xml)
# 在我的电脑里查找就可以找到,找到后复制到当前文件夹内
# 我的电脑的储存路径是C:\Users\dell\AppData\Roaming\Python\Python37\site-packages\cv2\data
classfier = cv2.CascadeClassifier('F:/try/face detect/haarcascade_frontalface_alt2.xml')
# 设置边框颜色(用于框出人脸)
color = (0, 255, 0)
font = cv2.FONT_HERSHEY_SIMPLEX # 创建摄像头前置的文字框
while cap.isOpened():
catch, frame = cap.read() # 读取每一帧图片
if not catch:
raise Exception('Check if the camera if on.')
break
# 转换为灰度图片
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray = ad_threshold(gray) # 自适应二值化处理
# scaleFactor 为图片缩放比例
# minNeighbors 是至少检查为3次是人脸才作为标记,适当增大可以有效抗干扰
# minSize 是检测的最小人脸的大小
faceRects = classfier.detectMultiScale(
gray, scaleFactor=1.2, minNeighbors=3, minSize=(32, 32))
if len(faceRects) > 0:
# 历遍每次检测的所有脸
for face in faceRects:
x, y, w, h = face # face是一个元祖,返回了分类器的检测结果,包括起始点的坐标和高度宽度
image = frame[y - 10:y + h + 10, x - 10:x + w + 10] # 对原图片进行裁剪
cv2.rectangle(frame, (x - 5, y - 5), (x + w + 5, y + h + 5), color, 2) # 绘制人脸检测的线框
cv2.putText(frame, 'face', (x + 30, y + 30), font, 1, (255, 0, 255), 4)
cv2.imshow(window_name, frame) # 显示人脸检测结果
c = cv2.waitKey(10)
if c & 0xFF == ord('q'):
# 按q退出
break
if cv2.getWindowProperty(window_name, cv2.WND_PROP_AUTOSIZE) < 1:
# 点x退出
break
# 释放摄像头
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
CatchUsbVideo("face_detect", camera_index=0)
# camera_index 是摄像头的编号,其中笔记本前置摄像头编号为0
3 读取数据
新建read_data.py
from random import shuffle
import pandas as pd
import numpy as np
import pickle
import cv2
import os
from random import randint
class Reader(object):
def __init__(self):
self.train_csv = 'F:/try/face detect/Facial Keypoints Detection/training.csv'
self.test_csv = 'F:/try/face detect/Facial Keypoints Detection/test.csv'
self.cursor = 0
self.names_path = 'F:/try/face detect/Facial Keypoints Detection/names.txt'
self.data_path = 'F:/try/face detect/Facial Keypoints Detection/data.pkl'
self.train_image_path = 'F:/try/face detect/Facial Keypoints Detection/train_image'
self.size = 96
if not os.path.exists(self.train_image_path):
os.makedirs(self.train_image_path)
self.data = self.pre_process()
else:
with open(self.data_path, 'rb') as f:
self.data = pickle.load(f)
print('There is {} in total data.'.format(len(self.data)))
shuffle(self.data)
with open(self.names_path, 'r') as f:
self.names = f.read().splitlines()
self.data_num = len(self.data)
self.label_num = len(self.names)
def pre_process(self):
data = pd.read_csv(self.train_csv)
data = data.dropna()
cols = data.columns[:-1]
data = data.to_dict()
for key, value in data['Image'].items():
data['Image'][key] = np.fromstring(value, sep=' ')
data_names = list(data.keys())
data_names.remove('Image')
with open(self.names_path, 'w') as f:
for value in data_names:
f.writelines(value+'\n')
labels = []
for index in data['Image'].keys():
label = {}
image = data['Image'][index].reshape((96, 96))
image_name = 'image_{}.jpg'.format(index)
image_path = os.path.join(self.train_image_path, image_name)
cv2.imwrite(image_path, image)
label['image_path'] = image_path
for point_name in data_names:
label[point_name] = data[point_name][index]
labels.append(label)
with open(self.data_path, 'wb') as f:
pickle.dump(labels, f)
return labels
def random_flip(self, image, points):
if randint(0, 1):
image = np.flip(image, axis=0)
points[1::2] = 1 - points[1::2]
return image, points
def generate(self, batch_size=1):
images = []
points = []
for _ in range(batch_size):
path = self.data[self.cursor]['image_path']
image = cv2.imread(path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
images.append(image)
tmp = []
for key in self.names:
value = self.data[self.cursor][key]
tmp.append(value)
points.append(tmp)
self.cursor += 1
if self.cursor >= self.data_num:
self.cursor = 0
shuffle(self.data)
images = np.array(images).reshape(
(batch_size, self.size, self.size, 1))
images = images - 127.5
points = np.array(points)
points = points/self.size
# images, points = self.random_flip(images, points)
return images, points
if __name__ == "__main__":
import matplotlib.pyplot as plt
reader = Reader()
for _ in range(10):
image, point = reader.generate(1)
image = np.squeeze(image)
point = np.squeeze(point)
image = (image + 127.5).astype(np.int)
point = (point * 96).astype(np.int)
result = image.copy()
y_axis = point[1::2]
x_axis = point[::2]
color = (0, 0, 255)
for y, x in zip(y_axis, x_axis):
cv2.circle(result, (x, y), 1, color)
plt.imshow(result)
plt.show()
运行结束可以看到增加了三个文件(先前只有.csv)
4 训练模型
新建network.py,只是为了熟悉一下流程,所以只设置了epoch为20,原博主实际设为100,如若报错缺少什么包,conda install xxx 即可
import tensorflow as tf
from read_data import Reader
import os
import numpy as np
slim = tf.contrib.slim
class Net(object):
def __init__(self, is_training=True):
self.is_training = is_training
if self.is_training:
self.reader = Reader()
self.batch_size = 16
self.lr = 2e-4
self.wd = 5e-3
self.epoches = 20
self.batches = 256
self.size = 96
self.label_num = 30
self.x = tf.placeholder(tf.float32, [None, self.size, self.size, 1])
self.y = tf.placeholder(tf.float32, [None, self.label_num])
self.y_hat = self.network(self.x)
self.model_path = './model'
self.ckpt_path = os.path.join(self.model_path, 'model.ckpt')
self.saver = tf.train.Saver()
def loss_layer(self, y, y_hat):
loss = tf.reduce_sum(tf.square(y - y_hat))
return loss
def network(self, inputs):
with tf.variable_scope('net'):
with slim.arg_scope([slim.conv2d],
activation_fn=tf.nn.relu,
weights_regularizer=slim.l2_regularizer(self.wd)):
# Block init
net = slim.conv2d(inputs, 1024, [3, 3],
2, scope='conv_init', padding='SAME')
# Block 1
net = slim.repeat(net, 2, slim.conv2d,
64, [3, 3], scope='conv1', padding='SAME')
net = slim.max_pool2d(
net, [2, 2], scope='pool1', padding='SAME')
net = tf.layers.batch_normalization(
net, trainable=self.is_training, name='BN_block1')
# Block 2
net = slim.repeat(net, 2, slim.conv2d,
128, [3, 3], scope='conv2')
net = slim.max_pool2d(
net, [2, 2], scope='pool2', padding='SAME')
net = tf.layers.batch_normalization(
net, trainable=self.is_training, name='BN_block2')
# Block 3
net = slim.repeat(net, 3, slim.conv2d,
256, [3, 3], scope='conv3')
net = slim.max_pool2d(
net, [2, 2], scope='pool3', padding='SAME')
net = tf.layers.batch_normalization(
net, trainable=self.is_training, name='BN_block3')
# Block 4
net = slim.repeat(net, 3, slim.conv2d,
512, [3, 3], scope='conv4')
net = slim.max_pool2d(
net, [2, 2], scope='pool4', padding='SAME')
net = tf.layers.batch_normalization(
net, trainable=self.is_training, name='BN_block4')
# Block 5
net = slim.repeat(net, 3, slim.conv2d,
512, [3, 3], scope='conv5')
net = tf.layers.batch_normalization(
net, trainable=self.is_training, name='BN_block5')
# Block 6
net = slim.conv2d(net, 1024, [3, 3],
2, scope='conv6')
net = tf.layers.batch_normalization(
net, trainable=self.is_training, name='BN_block6')
net = tf.layers.flatten(net)
logits = tf.layers.dense(net, self.label_num)
if self.is_training:
logits = tf.layers.dropout(logits)
# logits = tf.nn.tanh(logits)
return logits
def train_net(self):
if not os.path.exists(self.model_path):
os.makedirs(self.model_path)
self.loss = self.loss_layer(self.y, self.y_hat)
self.optimizer = tf.compat.v1.train.AdamOptimizer(self.lr)
self.train_step = self.optimizer.minimize(self.loss)
with tf.Session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
ckpt = tf.train.get_checkpoint_state(self.model_path)
if ckpt and ckpt.model_checkpoint_path:
# 如果保存过模型,则在保存的模型的基础上继续训练
self.saver.restore(sess, ckpt.model_checkpoint_path)
print('Model Reload Successfully!')
for epoch in range(self.epoches):
loss_list = []
for batch in range(self.batch_size):
images, labels = self.reader.generate(self.batch_size)
feed_dict = {
self.x: images,
self.y: labels
}
loss_value, _ = sess.run(
[self.loss, self.train_step], feed_dict)
loss_list.append(loss_value)
loss = np.mean(np.array(loss_list))
print('epoch:{} loss:{}'.format(epoch, loss))
with open('./losses.txt', 'a') as f:
f.write(str(loss)+'\n')
self.saver.save(sess, self.ckpt_path)
def test_net(self, image, sess):
image = image.reshape((1, self.size, self.size, 1)) - 127.5
points = sess.run(self.y_hat, feed_dict={self.x: image})
points = (points * self.size).astype(np.int)
return np.squeeze(points)
if __name__ == '__main__':
import cv2
import matplotlib.pyplot as plt
net = Net()
net.train_net()
with open('./losses.txt', 'r') as f:
losses = f.read().splitlines()
losses = [eval(v) for v in losses]
plt.plot(losses)
plt.title('loss')
plt.show()
运行代码过程中可以看到
训练过程中生成的相应文件,model中保存了训练的模型,loss.txt中保存了训练的loss值
生成的loss曲线如图
5 笔记本摄像头读取视频进行识别
新建keypoint.py
import cv2
import tensorflow as tf
from network import Net
import numpy as np
class FaceDetertor(object):
def __init__(self):
self.model_path = 'F:/try/face detect/model'
self.net = Net(is_training=False)
self.size = 96
def ad_threshold(self, img):
th2 = cv2.adaptiveThreshold(img, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 19, 4) # 自适应二值化
return th2
def CatchUsbVideo(self, window_name, camera_index):
# 定义主函数
cv2.namedWindow(window_name) # 创建摄像头窗口
cap = cv2.VideoCapture(camera_index) # 调用摄像头(一般电脑自带摄像头index为0)
# 调用分类(人脸识别分类器是cv一个预训练的模型,文件名为haarcascade_frontalface_alt2.xml)
# 在我的电脑里查找就可以找到,找到后复制到当前文件夹内
# 我的电脑的储存路径是C:\Users\dell\AppData\Roaming\Python\Python37\site-packages\cv2\data
classfier = cv2.CascadeClassifier('F:/try/face detect//haarcascade_frontalface_alt2.xml')
# 设置边框颜色(用于框出人脸)
color = (0, 255, 0)
font = cv2.FONT_HERSHEY_SIMPLEX # 创建摄像头前置的文字框
with tf.Session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
ckpt = tf.train.get_checkpoint_state(self.model_path)
if ckpt and ckpt.model_checkpoint_path:
# 如果保存过模型,则在保存的模型的基础上继续训练
self.net.saver.restore(sess, ckpt.model_checkpoint_path)
print('Model Reload Successfully!')
while cap.isOpened():
catch, frame = cap.read() # 读取每一帧图片
if not catch:
raise Exception('Check if the camera if on.')
break
# 转换为灰度图片
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray = self.ad_threshold(gray) # 自适应二值化处理
# scaleFactor 为图片缩放比例
# minNeighbors 是至少检查为3次是人脸才作为标记,适当增大可以有效抗干扰
# minSize 是检测的最小人脸的大小
faceRects = classfier.detectMultiScale(
gray, scaleFactor=1.2, minNeighbors=2, minSize=(32, 32))
if len(faceRects) > 0:
# 历遍每次检测的所有脸
for face in faceRects:
x, y, w, h = face # face是一个元祖,返回了分类器的检测结果,包括起始点的坐标和高度宽度
image = frame[y-10:y+h+10, x-10:x+w+10] # 对原图片进行裁剪
cv2.rectangle(frame, (x-5, y-5), (x+w+5, y+h+5),
color, 2) # 绘制人脸检测的线框
cv2.putText(frame, 'face', (x + 30, y + 30),
font, 1, (255, 0, 255), 4)
image_x = cv2.resize(cv2.cvtColor(
image, cv2.COLOR_BGR2GRAY), (self.size, self.size))
points = self.net.test_net(image_x, sess)
points_x = points[::2] / self.size * w + x
points_y = points[1::2] / self.size * h + y
points_x = points_x.astype(np.int)
points_y = points_y.astype(np.int)
for x_, y_ in zip(points_x, points_y):
cv2.circle(frame, (x_, y_), 2, (0, 0, 255), -1)
cv2.imshow(window_name, frame) # 显示人脸检测结果
c = cv2.waitKey(10)
if c & 0xFF == ord('q'):
# 按q退出
break
if cv2.getWindowProperty(window_name, cv2.WND_PROP_AUTOSIZE) < 1:
# 点x退出
break
# 释放摄像头
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
Detertor = FaceDetertor()
Detertor.CatchUsbVideo("face_detect", camera_index=0)
# camera_index 是摄像头的编号,其中笔记本前置摄像头编号为0
成功!!