深度学习技术已经广泛应用在各个行业领域。实际应用,通过大量数据往往可以训练一个泛化能力好的模型,但如何将模型进行快捷、方便的远程部署,已成为好多企业考虑的问题。现阶段,常用的深度学习模型远程部署工具有tensrflow/serving、onnx、OpenVINO、paddlepaddle/serving。本文就来详细介绍如何用Docker完成tensorflow/serving远程部署深度学习模型

1、环境准备

1.1、 系统:ubuntu16.04/centos7.2
1.2、 NVIDIA驱动:要求满足cuda10.0+cudnn7.6
1.3、 docker+nvidia-docker2:安装教程[link]
1.4、 tensorflow1.14.0 + keras2.3
1.5、 深度学习模型:keras的.h5模型,tensorflow的.ckpt或.pb模型

2、模型转换

2.1、keras训练得到的.h5模型
from keras import backend as K
import tensorflow as tf
from tensorflow.python import saved_model
from tensorflow.python.saved_model.signature_def_utils_impl import (
    build_signature_def, predict_signature_def)
from keras_retinanet import models
import shutil
import os

#tensorflow/serving模型保存路径
export_path = 'keras-retinanet-master/snapshots/fire_models'
#导入keras模型
num_classes = 1
model = models.convert_model(    model=models.backbone(backbone_name='resnet50').retinanet(num_classes=num_classes),
    nms=True,
    class_specific_filter=True,
    anchor_params=None
)
model.load_weights('keras-retinanet-master/snapshots/resnet50_csv_11.h5')
#打印模型的输入、输出层
print('Output layers', [o.name[:-2] for o in model.outputs])
print('Input layer', model.inputs[0].name[:-2])
#建立一个builder
if os.path.isdir(export_path):
    shutil.rmtree(export_path)
builder = saved_model.builder.SavedModelBuilder(export_path)

#定义模型的输入输出,建立调用接口与tensor签名之间的映射
signature = predict_signature_def(
    inputs={'input': model.input},   #创建输入字典,key为自己定义名字,value为.h5模型的输入
    outputs={
        'loc_box': model.outputs[0],  #创建输出字典,key为自定义名字,value为.h5模型的输出
        'fire_pre': model.outputs[1],
        'fire_class': model.outputs[2]})
sess = K.get_session()       #创建会话

#建立模型名称与模型签名之间的映射
builder.add_meta_graph_and_variables(sess=sess,\
                                     tags=[saved_model.tag_constants.SERVING],\
                                     signature_def_map={'predict': signature}   
                                     )
#保存模型                                    
builder.save()
2.2、tensorflow训练得到的.pb模型转化为tf_serving .pb模型
import tensorflow as tf
import os

def export_model(PATH_TO_PB):
    tf.reset_default_graph()
    output_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_PB, 'rb') as fid:
        # 将*.pb文件读入serialized_graph
        serialized_graph = fid.read()
        # 将serialized_graph的内容恢复到图中
        output_graph_def.ParseFromString(serialized_graph)
        # print(output_graph_def)
        # 将output_graph_def导入当前默认图中(加载模型)
        tf.import_graph_def(output_graph_def, name='')
    print('模型加载完成')
    # 使用默认图,此时已经加载了模型
    detection_graph = tf.get_default_graph()
    # self.sess = tf.Session(graph=detection_graph)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config, graph=detection_graph)
    
    #导出图中输入变量
    image_tensor = detection_graph.get_tensor_by_name('input_1:0')
    #导出图中输出变量
    boxes=detection_graph.get_tensor_by_name('filtered_detections/map/TensorArrayStack/TensorArrayGatherV3:0')
    scores = detection_graph.get_tensor_by_name('filtered_detections/map/TensorArrayStack_1/TensorArrayGatherV3:0')
    classes = detection_graph.get_tensor_by_name('filtered_detections/map/TensorArrayStack_2/TensorArrayGatherV3:0')
    return sess, image_tensor, boxes,scores, classes
def main(export_model_dir):
    sess, image_tensor, boxes,scores, classes = export_model(PATH_TO_CKPT)
    #创建一个builder
    export_path_base = export_model_dir
    export_path = os.path.join(
        tf.compat.as_bytes(export_path_base))
    builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(export_path)
    print('step2 => Export path(%s) ready to export trained model' % export_path)

    #创建tensorflow/serving模型输入输出映射
    inputs = {'input': tf.compat.v1.saved_model.utils.build_tensor_info(image_tensor)}
    outputs = {'loc_box': tf.compat.v1.saved_model.utils.build_tensor_info(boxes),
               'fire_pre': tf.compat.v1.saved_model.utils.build_tensor_info(scores),
               'fire_class': tf.compat.v1.saved_model.utils.build_tensor_info(classes)}
    prediction_signature = (tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
            inputs, outputs, method_name=tf.saved_model.PREDICT_METHOD_NAME))
    print('step3 => prediction_signature created successfully')
    
    # 建立模型名称与模型签名之间的映射
    builder.add_meta_graph_and_variables(sess, [tf.saved_model.SERVING],
                                         signature_def_map={'predict': prediction_signature})
    # tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY:prediction_signature,
    #         })
    print('step4 => builder successfully add meta graph and variables\nNext is to export model...')
    builder.save()
    print('Done exporting!')

if __name__ == '__main__':
    PATH_TO_PB = 'snapshots/model.pb'
    outputs = 'snapshots/my_models'
    main(outputs)
2.3、tensorflow训练得到的.pb模型
import tensorflow as tf
import os

def export_model(PATH_TO_CKPT):
    checkpoint_file = tf.train.latest_checkpoint(PATH_TO_CKPT)
    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
        sess = tf.Session(config=session_conf)
    with sess.as_default():
         # 载入保存好的meta graph
         saver = tf.train.import_meta_graph("{}.meta".format(PATH_TO_CKPT))
         saver.restore(sess, checkpoint_file)
         
         #恢复图中输入变量
         image_tensor = detection_graph.get_tensor_by_name('input_1:0')
         #恢复图中输出变量
         boxes=graph.get_tensor_by_name('filtered_detections/map/TensorArrayStack/TensorArrayGatherV3:0')
         scores = graph.get_tensor_by_name('filtered_detections/map/TensorArrayStack_1/TensorArrayGatherV3:0')
         classes = graph.get_tensor_by_name('filtered_detections/map/TensorArrayStack_2/TensorArrayGatherV3:0')
         return sess, image_tensor, boxes,scores, classes
    
def main(export_model_dir):
    sess, image_tensor, boxes,scores, classes = export_model(PATH_TO_CKPT)
    # 创建一个builder
    export_path_base = export_model_dir
    export_path = os.path.join(
        tf.compat.as_bytes(export_path_base))
    builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(export_path)
    print('step2 => Export path(%s) ready to export trained model' % export_path)
    
    # 定义模型的输入输出,建立调用接口与tensor签名之间的映射
    inputs = {'input': tf.compat.v1.saved_model.utils.build_tensor_info(image_tensor)}
    outputs = {'loc_box': tf.compat.v1.saved_model.utils.build_tensor_info(boxes),
               'fire_pre': tf.compat.v1.saved_model.utils.build_tensor_info(scores),
               'fire_class': tf.compat.v1.saved_model.utils.build_tensor_info(classes)
    
    prediction_signature = (tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
            inputs, outputs, method_name=tf.saved_model.PREDICT_METHOD_NAME))
    print('step3 => prediction_signature created successfully')
    # 建立模型名称与模型签名之间的映射
    builder.add_meta_graph_and_variables(sess, [tf.saved_model.SERVING],
                                         signature_def_map={'predict': prediction_signature})
    # tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY:prediction_signature,
    #         })
    print('step4 => builder successfully add meta graph and variables\nNext is to export model...')
    builder.save()
    print('Done exporting!')
 
 if __name__ == '__main__':
     PATH_TO_CKPT = 'snapshots/model.ckpt'
     outputs = 'snapshots/my_models'        
     main(outputs)

3、远程部署

3.1、拉取镜像
     $: docker pull tensorflow/serving:latest        #cpu版本
     $: docker pull tensorflow/serving:gpu-latest    #gpu版本,nvidia驱动要求满足cuda10.0以上
3.2、模型文件夹结构(--表示文件夹,-表示文件):
     -- mul_models
          -- fire-model
               -- 1               #文件夹名必须是数字
                  -  *.pb;
                  --  variable
3.3、启动服务
     $: docker run -d --runtime=nvidia --rm -p 8500:8500 -p 8501:8501 --mount type=bind,source=${model_path},target=/models/fire-model -e MODEL_NAME=fire-model -e NVIDIA_VISIBLE_DEVICES=0 -t tensorflow/serving:latest-gpu
     #注意:
      (1)tensorflow/serving镜像默认的端口是8500和8501,其中8500访问方式是grpc方式,8501访问方式是HTTP方式;
      (2)执行该命令时,mount、source、target命令之间不能存在空格,否则会报错
      (3)更新模型时,直接将新的模型文件丢进fire-model 文件夹里面即可,无需停止服
         
         
3.4、参数说明
     --mount:   表示要进行挂载
     source:    指定要运行部署的tensorflow/serving模型地址,
     target:     挂载到docker容器中/models/目录下,如果改变路径会出现找不到model的错误
     -t:         指定的是挂载到哪个容器
     -d:         后台运行
     -p:         指定主机到docker容器的端口映射
     -e:         环境变量
     -v:         docker数据卷  #可选择
     --name:     指定容器name,后续使用比用container_id更方便   #可选择
     --per_process_gpu_memory_fraction:模型启动时占用gpu的显存

4、多个模型同时远程部署

第3步是针对单个深度学习模型进行远程部署.但在实际应用中,我们可能有多个深度学习模型需要部署,其实很简单,tensorflow/serving镜像允许通过配置文件来同时\
部署多个模型。
4.1、模型文件夹结构(--表示文件夹,-表示文件):
    -- mul_models
         - model.config
         -- my_models1
                -- 1 
                   -  *.pb;
                   --  variable
                 -- 2 
                   -  *.pb
                   -- variables
          -- my_models2 
                 -- 1
                   -  *.pb
                   --  variables
          -- my_models3 
                 -- 3
                   -  *.pb
                   --  variables

4.2、编写model.config配置文件:
	model_config_list:{
	    config:{
	      name:"my_models1",         #模型名称,一般用文件夹名称即可
	      base_path:"/models/mul_models/my_models1",  #模型在容器中的路径
	      model_platform:"tensorflow",
	      model_version_policy:{      #部署该文件下模型的所有版本
	            all:{}
	      }
	    },
	    config:{
	      name:"my_models2",
	      base_path:"/models/mul_models/my_models2",
	      model_platform:"tensorflow",
	      model_version_policy:{        #部署该文件下模型最新版本
	        latest:{
	             num_verision:1
	        }
	      }
	    },
	    config:{
	      name:"my_model3",
	      base_path:"/models/mul_models/my_model3",
	      model_platform:"tensorflow",
	      model_version_policy: {      #部署该文件下模型指定版本
	         specific: {
	             versions: 1
	         }
	      }
	    } 
	 } 
4.3、启动服务:
     $: docker run --runtime=nvidia --rm -p 8500:8500 --mount type=bind,source=${model_path},target=/models/deep_models -e NVIDIA_VISIBLE_DEVICES=0 -t tensorflow/serving:latest-gpu --model_config_file=/models/deep_models/model.config

5、测试

tensorflow/serving镜像默认有两种访问方式,分别为grpc方式和HTTP方式,从响应速度方面来看,grpc方式速度更快。

5.1 grpc方式
5.1.1 安装tensorflow-serving-api
$: pip install tensorflow-serving-api(注:该api安装的版本必须要与tensorflow版本对应)
5.1.2 测试代码
import tensorflow as tf
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2_grpc
import grpc                #如果报错找不到grpc模块,终端进行安装:pip install grpc
import cv2
import numpy as np
from time import time

channel = grpc.insecure_channel("localhost:8500")
stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
request1 = predict_pb2.PredictRequest()
request1.model_spec.name = "fire_models"
#request1.model_spec.signature_name = tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY
request1.model_spec.signature_name = "predict"
preprocess(image):
    '''
      模型输入前期处理
    '''
    
def postprocess(boxes,scores,labels):
    '''
      模型输出后期处理
    '''
    
def main():
    img_path = "path/to/image"
    image = cv.imread(img_path)
    preprocess(image)
    a1 = time()
    request1.inputs["input"].ParseFromString(
    tf.contrib.util.make_tensor_proto(image_np_expanded, dtype=tf.float32).SerializeToString())
    response = stub.Predict(request1)
    a2 = time()
    print('目标检测响应时间{}'.format(a1 - a2))
    results = {}
    for key in response.outputs:
        tensor_proto = response.outputs[key]
        results[key] = tf.contrib.util.make_ndarray(tensor_proto)
    boxes = results["loc_box"]
    scores = results["fire_pre"]
    labels = results["fire_class"]
    postprocess(boxes,scores,labels)      

if __name__ == '__main__': 
    main()
5.2 HTTP方式
import requests
from time import time
import json
import numpy as np
import cv2
def preprocess(image):
  '''
    模型输入前期处理
  '''
def postprocess(boxes,scores,labels):
  '''
    模型输出后期处理
  '''
def main():
  url = 'http://localhost:8501/v1/models/my_models:predict'    #配置IP和port
  img_path = "path/to/image"
  image = cv.imread(img_path)
  preprocess(image)
  a1 = time()
  predict_request = { "inputs" : image_np_expanded.tolist()}      #准备需要发送的数据,"inputs"与与.pb模型设置的输入节点一致
  r = requests.post(url, json=predict_request)                    #发送请求
  a2 = time()
  print('目标检测响应时间{}'.format(a1 - a2))
  prediction = json.loads(r.content.decode("utf-8"))['outputs']   #获取响应结果
  boxes = np.array(prediction.get("loc_box"))
  scores = np.array(prediction.get('fire_pre'))
  labels = np.array(prediction.get("fire_class"))
  postprocess(boxes,scores,labels) 
  if __name__ == '__main__': 
      main()

至此,tensorflow/serving深度学习模型在线部署已全部完成。