一、图片物体识别

(注:使用以下代码时,路径部分需要根据情况自行修改)

# -*- coding: utf-8 -*-
import os
import sys
import cv2
import numpy as np
import shutil
import tensorflow as tf
import time
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

class TOD(object):
    def __init__(self):
        '''
        载入模型以及数据集样本标签,加载待测试的图片文件
        '''
        #指定要使用的模型的路径  包含图结构,以及参数
        self.PATH_TO_CKPT = 'export/forzon_inference_graph7045/frozen_inference_graph.pb'
        #数据集对应的label_map.pbtxt文件保存了index到类别名的映射
        self.PATH_TO_LABELS = 'label_map.pbtxt'
        # 检测目标数量
        self.NUM_CLASSES = 3
        self.detection_graph = self._load_model()
        self.category_index = self._load_label_map()
    
    def _load_model(self):

        detection_graph = tf.Graph()
        with detection_graph.as_default():
            #重新定义一个图
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(self.PATH_TO_CKPT, 'rb') as fid:
                #将*.pb文件读入serialized_graph
                serialized_graph = fid.read()
                #将serialized_graph的内容恢复到图中
                od_graph_def.ParseFromString(serialized_graph)
                 #将od_graph_def导入当前默认图中(加载模型)
                tf.import_graph_def(od_graph_def, name='')
        return detection_graph

    def _load_label_map(self):
        label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS)
        categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True)
        category_index = label_map_util.create_category_index(categories)
        return category_index


    def detect(self, img_path):
        d={}
        
        path1 = 'test_result_jisuan7045'

        path_have = path1 + '/have'
        # 判断文件夹是否存在  存在则删除
        if (os.path.exists(path_have)):
            for i in os.listdir(path_have):
                path_file = os.path.join(path_have, i)  # 取文件绝对路径
                if os.path.isfile(path_file):
                    os.remove(path_file)
            shutil.rmtree(path_have)
        # 删除后创建目录
        os.makedirs(path_have)

        path_no = path1 + '/no'
        # 判断文件夹是否存在  存在则删除
        if (os.path.exists(path_no)):
            for i in os.listdir(path_no):
                path_file = os.path.join(path_no, i)  # 取文件绝对路径
                if os.path.isfile(path_file):
                    os.remove(path_file)
            shutil.rmtree(path_no)
        # 删除后创建目录
        os.makedirs(path_no)
        with self.detection_graph.as_default():
            with tf.Session(graph=self.detection_graph) as sess:
                #获取模型中的tensor
                image_tensor = self.detection_graph.get_tensor_by_name('image_tensor:0')
                #boxes用来显示识别结果
                boxes = self.detection_graph.get_tensor_by_name('detection_boxes:0')
                 #每个分数表示每个对象的置信度
                 #score在结果图像上显示分数,以及类标签
                scores = self.detection_graph.get_tensor_by_name('detection_scores:0')                        
                classes = self.detection_graph.get_tensor_by_name('detection_classes:0')
                num_detections = self.detection_graph.get_tensor_by_name('num_detections:0')
                #count = 0
                totaltime=0
                for i in os.listdir(img_path):
                    #count+=1
                    #print('正在检测第%d张图片'%count)
                    #print('对%s进行检测:'%i) 
                    start = time.time()
                    if i.endswith('.jpg') or i.endswith('.JPG'):
                        path = os.path.join(img_path, i)
                        image = cv2.imread(path)
                        path_summary_have=os.path.join(path_have,i)
                        path_summary_no=os.path.join(path_no,i)
                        #将图片扩展一维,最后进入神经网络的图片格式应该是[1, None, None, 3]
                        image_np_expanded = np.expand_dims(image, axis=0)
                        
                        #实际检测
                        (boxes1, scores1, classes1, num_detections1) = sess.run(
                            [boxes, scores, classes, num_detections],
                            feed_dict={image_tensor: image_np_expanded})
                        #print((boxes, scores, classes, num_detections))
                        #print(scores)

                        num=0
                        #取score中最大的10个数  确定一张图片里的绝缘子个数
                        for n in range(10):
                            #print(scores[0][n])
                            if(scores1[0][n]>0.6):
                                num=num+1
                        #print(num)
                        d[i]=num

                        #print(scores[1])
                      


                        #检测结果的可视化
                        vis_util.visualize_boxes_and_labels_on_image_array(
                            image,
                            np.squeeze(boxes1),
                            np.squeeze(classes1).astype(np.int32),
                            np.squeeze(scores1),
                            self.category_index,
                            min_score_thresh=.6,  #默认0.5,要显示的框的最低得分阈值
                            max_boxes_to_draw=20,  #默认20,要可视化的框的最大数量。如果没有,画出所有的盒子。
                            use_normalized_coordinates=True,  #是否将盒子解释为标准化坐标
                            line_thickness=8)   #默认4,控制框的线宽
                        if(num>0):
                            cv2.imwrite(path_summary_have, image)
                        else:
                            cv2.imwrite(path_summary_no, image)
                    end = time.time()
                    per_time=end-start
                    #print(per_time)
                    totaltime += per_time
                print("检测总时间:"+str(totaltime))
                test_nums = len(os.listdir('test'))
                print("每张检测时间:"+str(totaltime/test_nums))

if __name__ == '__main__':

    detecotr = TOD()
    #print(sys.argv)
    #img_path=sys.argv[1]
    img_path = 'test'
    #start = time.time()
    detecotr.detect(img_path)
    #end = time.time()
    #print('检测过程运行时间为:',end - start)
    print('检测完成!')

二、视频物体识别

1、使用Anaconda安装opencv,安装完成后执行以下命令:

import cv2

不报错则安装成功。

2、视频识别代码 :

(注1:以下代码中方法1和方法2,使用一个即可,将另一个注释。我这里使用的是方法1,注释了方法2)

(注2:使用以下代码时,路径部分需要根据情况自行修改)

import os
import cv2
import time
import argparse
import multiprocessing
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

import imageio
##下面这条语句第一次运行会下载一个软件,第二次运行时直接注释掉即可
imageio.plugins.ffmpeg.download()
from moviepy.editor import VideoFileClip

class TOD(object):
    def __init__(self):
        # Path to frozen detection graph. This is the actual model that is used for the object detection.
        #pb文件目录
        self.PATH_TO_CKPT = 'ssd_mobilenet_v1_coco_2018_01_28/frozen_inference_graph.pb'
        # List of the strings that is used to add correct label for each box.
        # pbtxt文件目录
        #self.PATH_TO_LABELS = 'label_map.pbtxt'
        self.PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')
        # 分类数量
        self.NUM_CLASSES = 90

        self.detection_graph = self._load_model()
        self.category_index = self._load_label_map()

    def _load_model(self):
        detection_graph = tf.Graph()
        with detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(self.PATH_TO_CKPT, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')
        return detection_graph

    def _load_label_map(self):
        label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS)
        categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True)
        category_index = label_map_util.create_category_index(categories)
        return category_index
    #方法1:检测代码写在一个函数里
    def detect_videos(self,image_np):
        with self.detection_graph.as_default():
            with tf.Session(graph=self.detection_graph) as sess:
            # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                image_np_expanded = np.expand_dims(image_np, axis=0)
                image_tensor = self.detection_graph.get_tensor_by_name('image_tensor:0')
        # Each box represents a part of the image where a particular object was detected.
                boxes = self.detection_graph.get_tensor_by_name('detection_boxes:0')
        # Each score represent how level of confidence for each of the objects.
        # Score is shown on the result image, together with the class label.
                scores = self.detection_graph.get_tensor_by_name('detection_scores:0')
                classes = self.detection_graph.get_tensor_by_name('detection_classes:0')
                num_detections = self.detection_graph.get_tensor_by_name('num_detections:0')
        # Actual detection.
                (boxes, scores, classes, num_detections) = sess.run(
                    [boxes, scores, classes, num_detections],
                    feed_dict={image_tensor: image_np_expanded})

        # Visualization of the results of a detection.
                vis_util.visualize_boxes_and_labels_on_image_array(
                    image_np,
                    np.squeeze(boxes),
                    np.squeeze(classes).astype(np.int32),
                    np.squeeze(scores),
                    self.category_index,
                    use_normalized_coordinates=True,
                    line_thickness=8)
                return image_np
 
    '''方法2:检测代码写在两个函数里(可替换方法1)
    def detect_videos(self,image_np, sess, detection_graph):
    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
        image_np_expanded = np.expand_dims(image_np, axis=0)
        image_tensor = self.detection_graph.get_tensor_by_name('image_tensor:0')

    # Each box represents a part of the image where a particular object was detected.
        boxes = self.detection_graph.get_tensor_by_name('detection_boxes:0')

    # Each score represent how level of confidence for each of the objects.
    # Score is shown on the result image, together with the class label.
        scores = self.detection_graph.get_tensor_by_name('detection_scores:0')
        classes = self.detection_graph.get_tensor_by_name('detection_classes:0')
        num_detections = self.detection_graph.get_tensor_by_name('num_detections:0')

    # Actual detection.
        (boxes, scores, classes, num_detections) = sess.run(
            [boxes, scores, classes, num_detections],
            feed_dict={image_tensor: image_np_expanded})

    # Visualization of the results of a detection.
        vis_util.visualize_boxes_and_labels_on_image_array(
            image_np,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            self.category_index,
            use_normalized_coordinates=True,
            line_thickness=8)
        return image_np
    
    def process_image(self,image):
    # NOTE: The output you return should be a color image (3 channel) for processing video below
    # you should return the final output (image with lines are drawn on lanes)
        with self.detection_graph.as_default():
            with tf.Session(graph=self.detection_graph) as sess:
                image_process = self.detect_objects(image, sess, self.detection_graph)
                return image_process
'''

if __name__ == '__main__':
    detecotr = TOD()
    video_input="video1.mp4"
    video_output = 'video1_out1.mp4'
    
    clip = VideoFileClip(video_input).subclip(0,3)
    white_clip = clip.fl_image(detecotr.detect_videos)
    start=time.time()
    white_clip.write_videofile(video_output, audio=False)
    end=time.time()
    print(end-start)

三、实时目标检测(摄像头)

(注2:使用以下代码时,路径部分需要根据情况自行修改)

import numpy as np
import os
import six.moves.urllib as urllib
import sys
import matplotlib as plt
import tarfile
import tensorflow as tf
import zipfile
import cv2
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from utils import label_map_util
from utils import visualization_utils as vis_util
from object_detection.utils import ops as utils_ops
 
# What model to download.
MODEL_NAME = 'ssd_mobilenet_v1_coco_2018_01_28'
 
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
 
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')
 
NUM_CLASSES = 90
 
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')
 
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
 
cap=cv2.VideoCapture(0) # 0 stands for very first webcam attach
filename="a.avi"#[place were i stored my output file]
codec=cv2.VideoWriter_fourcc('m','p','4','v')#fourcc stands for four character code
framerate=30
resolution=(640,480)
    
VideoFileOutput=cv2.VideoWriter(filename,codec,framerate, resolution)
    
with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:   
        ret=True
        while (ret):        
            ret, image_np=cap.read() 
            # Definite input and output Tensors for detection_graph
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            # Each box represents a part of the image where a particular object was detected.
            detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
            # Each score represent how level of confidence for each of the objects.
            # Score is shown on the result image, together with the class label.
            detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
            detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name('num_detections:0')
 
              # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
            image_np_expanded = np.expand_dims(image_np, axis=0)
              # Actual detection.
            (boxes, scores, classes, num) = sess.run(
                  [detection_boxes, detection_scores, detection_classes, num_detections],
                  feed_dict={image_tensor: image_np_expanded})
              # Visualization of the results of a detection.
            vis_util.visualize_boxes_and_labels_on_image_array(
                  image_np,
                  np.squeeze(boxes),
                  np.squeeze(classes).astype(np.int32),
                  np.squeeze(scores),
                  category_index,
                  use_normalized_coordinates=True,
                  line_thickness=8)
 
            VideoFileOutput.write(image_np)
            cv2.imshow("realtime_detection",image_np)
            if cv2.waitKey(25) & 0xFF==ord('q'):
                break
                cv2.destroyAllWindows()
                cap.release()

四、视频和实时目标检测(摄像头)

视频检测和实时目标检测其实是一个套路,所以在此整合到一个文件中,代码如下:

import os
import cv2
import time
import argparse
import multiprocessing
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

class TOD(object):
    def __init__(self):
        # Path to frozen detection graph. This is the actual model that is used for the object detection.
        #pb文件目录
        self.PATH_TO_CKPT = 'forzon_inference_graph7045/frozen_inference_graph.pb'
        # List of the strings that is used to add correct label for each box.
        # pbtxt文件目录
        self.PATH_TO_LABELS = 'label_map.pbtxt'
        #self.PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')
        # 分类数量
        self.NUM_CLASSES = 3

        self.detection_graph = self._load_model()
        self.category_index = self._load_label_map()

    def _load_model(self):
        detection_graph = tf.Graph()
        with detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(self.PATH_TO_CKPT, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')
        return detection_graph

    def _load_label_map(self):
        label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS)
        categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True)
        category_index = label_map_util.create_category_index(categories)
        return category_index

    def detect_videos(self,cap):
        count = 0
        
        filename="b.avi"#[place were i stored my output file]
        codec=cv2.VideoWriter_fourcc('m','p','4','v')#fourcc stands for four character code
        framerate=30
        resolution=(640,480)
        VideoFileOutput=cv2.VideoWriter(filename,codec,framerate, resolution)
        
        with self.detection_graph.as_default():
            with tf.Session(graph=self.detection_graph) as sess:
                while (1):
                    #start = time.time()
                    # 按帧读视
                    ret, frame = cap.read()
                    #print(ret)
                    if ret != True:
                        break
                    if cv2.waitKey(1) & 0xFF == ord('q'):
                        break
                    count += 1
                    if count % 30 == 0:
                        start = time.time()
                        image_np = frame
                        image_np_expanded = np.expand_dims(image_np, axis=0)
                        image_tensor = self.detection_graph.get_tensor_by_name('image_tensor:0')
                        boxes = self.detection_graph.get_tensor_by_name('detection_boxes:0')
                        scores = self.detection_graph.get_tensor_by_name('detection_scores:0')
                        classes = self.detection_graph.get_tensor_by_name('detection_classes:0')
                        num_detections = self.detection_graph.get_tensor_by_name('num_detections:0')
                    # Actual detection.
                        (boxes, scores, classes, num_detections) = sess.run(
                          [boxes, scores, classes, num_detections],
                          feed_dict={image_tensor: image_np_expanded})
                        vis_util.visualize_boxes_and_labels_on_image_array(
                            image_np,
                            np.squeeze(boxes),
                            np.squeeze(classes).astype(np.int32),
                            np.squeeze(scores),
                            self.category_index,
                            use_normalized_coordinates=True,
                            line_thickness=8)
                        end = time.time()
                        #各参数依次是:照片/添加的文字/左上角坐标/字体/字体大小/颜色/字体粗细
                        '''
                        1.FONT_HERSHEY_SIMPLEX    正常大小无衬线字体
                        Python: cv2.FONT_HERSHEY_SIMPLEX
                        2.FONT_HERSHEY_PLAIN    较小的sans-serif字体
                        Python: cv2.FONT_HERSHEY_PLAIN
                        3.FONT_HERSHEY_DUPLEX    正常大小的sans-serif字体(比FONT_HERSHEY_SIMPLEX更复杂)
                        Python: cv2.FONT_HERSHEY_DUPLEX
                        4.FONT_HERSHEY_COMPLEX    正常大小的衬线字体
                        Python: cv2.FONT_HERSHEY_COMPLEX
                        5.FONT_HERSHEY_TRIPLEX    正常大小的衬线字体(比FONT_HERSHEY_COMPLEX更复杂)
                        Python: cv2.FONT_HERSHEY_TRIPLEX
                        6.FONT_HERSHEY_COMPLEX_SMALL     FONT_HERSHEY_COMPLEX的小版本
                        Python: cv2.FONT_HERSHEY_COMPLEX_SMALL
                        7.FONT_HERSHEY_SCRIPT_SIMPLEX      书写风格的字体
                        Python: cv2.FONT_HERSHEY_SCRIPT_SIMPLEX
                        8.FONT_HERSHEY_SCRIPT_COMPLEX      FONT_HERSHEY_SCRIPT_SIMPLEX的更复杂的变体
                        Python: cv2.FONT_HERSHEY_SCRIPT_COMPLEX
                        9.FONT_ITALIC     标记为斜体字体
                        Python: cv2.FONT_ITALIC
                        '''
                        #事实证明,OpenCV中对RGB图像数据的存储顺序是BGR,而且Scalar()的顺序也是B,G,R,所以(255,0,0)为蓝色而不是红色。
                        cv2.putText(image_np, "FPS {0}" .format(str(1.0 / (end - start))), (30, 30), cv2.FONT_HERSHEY_SIMPLEX , 0.5, (255, 0, 0), 2)
                        #print('FPS:', 1.0 / (end - start))
                        VideoFileOutput.write(image_np)
                        cv2.imshow("capture", image_np)
                        cv2.waitKey(1)
        # 释放捕捉的对象和内存
        cap.release()
        cv2.destroyAllWindows()
if __name__ == '__main__':
    detecotr = TOD() 
    #如果VideoCapture传入的是0则打开默认摄像头,如果传入的是文件,则打开视频文件。
    cap = cv2.VideoCapture("a.MP4")    #检测视频
    #cap = cv2.VideoCapture(0)    #实时视频检测
    detecotr.detect_videos(cap)