行人目标检测视频数据集行人检测算法

转载

mob64ca13ff28f1 2024-08-15 16:27:34

文章标签 行人目标检测视频数据集 python opencv 计算机视觉机器学习 文章分类 计算机视觉人工智能

本次我们将使用SVM+HOG来进行行人识别。行人检测，就是将一张图片中的行人检测出来，并输出bounding box级别的结果。而如果将各个行人之间的轨迹关联起来，就变成了行人跟踪。而行人检索则是把一段视频中的某个感兴趣的人检索出来。

在2005年CVPR上，来自法国的研究人员Navneet Dalal 和Bill Triggs提出利用Hog进行特征提取，利用线性SVM作为分类器，从而实现行人检测。

HOG+SVM行人检测

首先计算样本图像的HOG描述子，组成一个特征向量矩阵，对应的要有一个指定每个特征向量的类别的类标向量，输入SVM中进行训练，训练好的SVM分类器保存为XML文件，
然后根据其中的支持向量和参数生成OpenCV中的HOG描述子可用的检测子参数，再调用OpenCV中的多尺度检测函数进行行人检测。

现在利用HOG特征来进行行人检测，既然要有了特征，现在其实要有一个方法来判断是否一个图片的某一部分是行人，SVM是一个很好的机器学习方法，可以用来分类，结合HOG特征就可以用来检测图片中的行人。OpenCV中集成了一个方法，getDefaultPeopleDetector等可以直接得到一个SVM的分类器，这个分类器是OpenCV自带的已经训练好的，可以直接拿来使用。下面可以看一下使用它的代码。

首先我们进行图片检测：

view plaincopy to clipboardprint?
# import the necessary packages  
 from __future__ import print_function  
 from imutils.object_detection import non_max_suppression  
 from imutils import paths  
 import numpy as np  
 import argparse  
 import imutils  
 import cv2  
   
 # construct the argument parse and parse the arguments  
 ap = argparse.ArgumentParser()  
 ap.add_argument("-i", "--images", required=True, help="path to images directory")  
 args = vars(ap.parse_args())  
   
 # initialize the HOG descriptor/person detector  
 hog = cv2.HOGDescriptor()  
 hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())  
   
 # loop over the image paths  
 for imagePath in paths.list_images(args["images"]):  
     # load the image and resize it to (1) reduce detection time  
     # and (2) improve detection accuracy  
     image = cv2.imread(imagePath)  
     image = imutils.resize(image, width=min(400, image.shape[1]))  
     orig = image.copy()  
   
     # detect people in the image  
     (rects, weights) = hog.detectMultiScale(image, winStride=(1, 1),  
                                             padding=(8, 8), scale=1.05)  
   
     # draw the original bounding boxes  
     for (x, y, w, h) in rects:  
         cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)  
   
     # apply non-maxima suppression to the bounding boxes using a  
     # fairly large overlap threshold to try to maintain overlapping  
     # boxes that are still people  
     rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])  
     pick = non_max_suppression(rects, probs=None, overlapThresh=0.45)  
   
     # draw the final bounding boxes  
     for (xA, yA, xB, yB) in pick:  
         cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2)  
   
     # show some information on the number of bounding boxes  
     filename = imagePath[imagePath.rfind("/") + 1:]  
     print("[INFO] {}: {} original boxes, {} after suppression".format(  
         filename, len(rects), len(pick)))  
   
     # show the output images  
     cv2.imshow("Before NMS", orig)  
     cv2.moveWindow('Before NMS', x=0, y=0)  
     cv2.imshow("After NMS", image)  
     cv2.moveWindow('After NMS', x=orig.shape[1], y=0)  
     k=cv2.waitKey(0)  
     if k==ord('q'):  
         break

我们加入了非极大值抑制（前面已经讲过），用来去除一些其他的噪声信息。

实验结果：

行人目标检测视频数据集行人检测算法_机器学习

行人目标检测视频数据集行人检测算法_python_02

行人目标检测视频数据集行人检测算法_行人目标检测视频数据集_03

行人目标检测视频数据集行人检测算法_机器学习_04

右边为使用了极大值抑制之后的结果。

现在我们可以进行视频检测，来看代码：

view plaincopy to clipboardprint?
# import the necessary packages  
 from __future__ import print_function  
 from imutils.object_detection import non_max_suppression  
 from imutils import paths  
 import numpy as np  
 import argparse  
 import imutils  
 import cv2  
 import time  
   
 # construct the argument parse and parse the arguments  
 # ap = argparse.ArgumentParser()  
 # ap.add_argument("-i", "--images", required=True, help="path to images directory")  
 # args = vars(ap.parse_args())  
   
 # initialize the HOG descriptor/person detector  
 hog = cv2.HOGDescriptor()  
 hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())  
   
 #  
 cap = cv2.VideoCapture('videos/  
礼让斑马线！齐齐哈尔城市文明的伤！  
.mp4')  
 # cap = cv2.VideoCapture('../../data/TownCentreXVID.mp4')  
   
 fps = cap.get(cv2.CAP_PROP_FPS)  # 25.0  
 print("Frames per second using video.get(cv2.CAP_PROP_FPS) : {0}".format(fps))  
 num_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)  
 print('  
共有  
', num_frames, '  
帧  
')  #   
共有  
 2499.0   
帧  
   
 frame_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  
 frame_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)  
 print('  
高：  
', frame_height, '  
宽：  
', frame_width)  #   
高：  
 480.0   
宽：  
 640.0  
 # exit(0)  
   
   
 #   
跳过多少帧  
 skips = 20  
   
 # loop over the image paths  
 # for imagePath in paths.list_images(args["images"]):  
 while cap.isOpened():  
   
     # load the image and resize it to (1) reduce detection time  
     # and (2) improve detection accuracy  
     # image = cv2.imread(imagePath)  
   
     ret, frame = cap.read()  
     image = frame  
   
     #  
     current = cap.get(cv2.CAP_PROP_POS_FRAMES)  
     if current % skips != 0:  
         continue  
   
     image = imutils.resize(image, width=min(400, image.shape[1]))  
     orig = image.copy()  
   
     # detect people in the image  
     (rects, weights) = hog.detectMultiScale(image, winStride=(4, 4),  
                                             padding=(8, 8), scale=1.05)  
   
     # draw the original bounding boxes  
     for (x, y, w, h) in rects:  
         cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)  
   
     # apply non-maxima suppression to the bounding boxes using a  
     # fairly large overlap threshold to try to maintain overlapping  
     # boxes that are still people  
     rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])  
     pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)  
   
     # draw the final bounding boxes  
     for (xA, yA, xB, yB) in pick:  
         cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2)  
   
     # show some information on the number of bounding boxes  
     # filename = imagePath[imagePath.rfind("/") + 1:]  
     # print("[INFO] {}: {} original boxes, {} after suppression".format(  
     print("[INFO] {} original boxes, {} after suppression".format(len(rects), len(pick)))  
   
     # show the output images  
     cv2.imshow("Before NMS", orig)  
     cv2.imshow("After NMS", image)  
     cv2.moveWindow("After NMS", y=0, x=400)  
   
     key = cv2.waitKey(delay=50)  
     if key == ord("q"):  
         break  
   
 # When everything done, release the capture  
 cap.release()  
 cv2.destroyAllWindows()

行人目标检测视频数据集行人检测算法_python_05