本次我们将使用SVM+HOG来进行行人识别。行人检测,就是将一张图片中的行人检测出来,并输出bounding box级别的结果。而如果将各个行人之间的轨迹关联起来,就变成了行人跟踪。而行人检索则是把一段视频中的某个感兴趣的人检索出来。
在2005年CVPR上,来自法国的研究人员Navneet Dalal 和Bill Triggs提出利用Hog进行特征提取,利用线性SVM作为分类器,从而实现行人检测。
HOG+SVM行人检测
首先计算样本图像的HOG描述子,组成一个特征向量矩阵,对应的要有一个指定每个特征向量的类别的类标向量,输入SVM中进行训练,训练好的SVM分类器保存为XML文件,
然后根据其中的支持向量和参数生成OpenCV中的HOG描述子可用的检测子参数,再调用OpenCV中的多尺度检测函数进行行人检测。
现在利用HOG特征来进行行人检测,既然要有了特征,现在其实要有一个方法来判断是否一个图片的某一部分是行人,SVM是一个很好的机器学习方法,可以用来分类,结合HOG特征就可以用来检测图片中的行人。OpenCV中集成了一个方法,getDefaultPeopleDetector等可以直接得到一个SVM的分类器,这个分类器是OpenCV自带的已经训练好的,可以直接拿来使用。下面可以看一下使用它的代码。
首先我们进行图片检测:
view plaincopy to clipboardprint?
# import the necessary packages
from __future__ import print_function
from imutils.object_detection import non_max_suppression
from imutils import paths
import numpy as np
import argparse
import imutils
import cv2
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--images", required=True, help="path to images directory")
args = vars(ap.parse_args())
# initialize the HOG descriptor/person detector
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
# loop over the image paths
for imagePath in paths.list_images(args["images"]):
# load the image and resize it to (1) reduce detection time
# and (2) improve detection accuracy
image = cv2.imread(imagePath)
image = imutils.resize(image, width=min(400, image.shape[1]))
orig = image.copy()
# detect people in the image
(rects, weights) = hog.detectMultiScale(image, winStride=(1, 1),
padding=(8, 8), scale=1.05)
# draw the original bounding boxes
for (x, y, w, h) in rects:
cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)
# apply non-maxima suppression to the bounding boxes using a
# fairly large overlap threshold to try to maintain overlapping
# boxes that are still people
rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
pick = non_max_suppression(rects, probs=None, overlapThresh=0.45)
# draw the final bounding boxes
for (xA, yA, xB, yB) in pick:
cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2)
# show some information on the number of bounding boxes
filename = imagePath[imagePath.rfind("/") + 1:]
print("[INFO] {}: {} original boxes, {} after suppression".format(
filename, len(rects), len(pick)))
# show the output images
cv2.imshow("Before NMS", orig)
cv2.moveWindow('Before NMS', x=0, y=0)
cv2.imshow("After NMS", image)
cv2.moveWindow('After NMS', x=orig.shape[1], y=0)
k=cv2.waitKey(0)
if k==ord('q'):
break
我们加入了非极大值抑制(前面已经讲过),用来去除一些其他的噪声信息。
实验结果:
右边为使用了极大值抑制之后的结果。
现在我们可以进行视频检测,来看代码:
view plaincopy to clipboardprint?
# import the necessary packages
from __future__ import print_function
from imutils.object_detection import non_max_suppression
from imutils import paths
import numpy as np
import argparse
import imutils
import cv2
import time
# construct the argument parse and parse the arguments
# ap = argparse.ArgumentParser()
# ap.add_argument("-i", "--images", required=True, help="path to images directory")
# args = vars(ap.parse_args())
# initialize the HOG descriptor/person detector
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
#
cap = cv2.VideoCapture('videos/
礼让斑马线!齐齐哈尔城市文明的伤!
.mp4')
# cap = cv2.VideoCapture('../../data/TownCentreXVID.mp4')
fps = cap.get(cv2.CAP_PROP_FPS) # 25.0
print("Frames per second using video.get(cv2.CAP_PROP_FPS) : {0}".format(fps))
num_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
print('
共有
', num_frames, '
帧
') #
共有
2499.0
帧
frame_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
frame_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
print('
高:
', frame_height, '
宽:
', frame_width) #
高:
480.0
宽:
640.0
# exit(0)
#
跳过多少帧
skips = 20
# loop over the image paths
# for imagePath in paths.list_images(args["images"]):
while cap.isOpened():
# load the image and resize it to (1) reduce detection time
# and (2) improve detection accuracy
# image = cv2.imread(imagePath)
ret, frame = cap.read()
image = frame
#
current = cap.get(cv2.CAP_PROP_POS_FRAMES)
if current % skips != 0:
continue
image = imutils.resize(image, width=min(400, image.shape[1]))
orig = image.copy()
# detect people in the image
(rects, weights) = hog.detectMultiScale(image, winStride=(4, 4),
padding=(8, 8), scale=1.05)
# draw the original bounding boxes
for (x, y, w, h) in rects:
cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)
# apply non-maxima suppression to the bounding boxes using a
# fairly large overlap threshold to try to maintain overlapping
# boxes that are still people
rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)
# draw the final bounding boxes
for (xA, yA, xB, yB) in pick:
cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2)
# show some information on the number of bounding boxes
# filename = imagePath[imagePath.rfind("/") + 1:]
# print("[INFO] {}: {} original boxes, {} after suppression".format(
print("[INFO] {} original boxes, {} after suppression".format(len(rects), len(pick)))
# show the output images
cv2.imshow("Before NMS", orig)
cv2.imshow("After NMS", image)
cv2.moveWindow("After NMS", y=0, x=400)
key = cv2.waitKey(delay=50)
if key == ord("q"):
break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
这是视频中的一部分截图,我们可以看到结果。