1. Build ncnn

# prepare part
$ cd ~/Github/
$ git clone https://github.com/Tencent/ncnn.git
$ cd ncnn
$ git submodule update --init
$ sudo apt install build-essential git cmake libprotobuf-dev protobuf-compiler libvulkan-dev vulkan-utils libopencv-dev

# build part
$ mkdir build && cd build
$ cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local/ ..
$ make -j$(nproc)
$ sudo make install

2. Build pnnx

Ubuntu 18.04.6 LTS
torch                     1.8.0+cpu
torchaudio                0.8.0
torchvision               0.9.0+cpu
cmake                     3.25.0
ninja                     1.11.1
gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
$ cd ~/Github/ncnn/
$ cd tools/pnnx/
$ pip install cmake==3.25.0 ninja==1.11.1 -i https://pypi.douban.com/simple
$ cmake -Bbuild -GNinja
$ cmake --build build --config Release
$ ./build/src/pnnx
Usage: pnnx [model.pt] [(key=value)...]
  pnnxparam=model.pnnx.param
  pnnxbin=model.pnnx.bin
  pnnxpy=model_pnnx.py
  pnnxonnx=model.pnnx.onnx
  ncnnparam=model.ncnn.param
  ncnnbin=model.ncnn.bin
  ncnnpy=model_ncnn.py
  fp16=1
  optlevel=2
  device=cpu/gpu
  inputshape=[1,3,224,224],...
  inputshape2=[1,3,320,320],...
  customop=/home/nihui/.cache/torch_extensions/fused/fused.so,...
  moduleop=models.common.Focus,models.yolo.Detect,...
Sample usage: pnnx mobilenet_v2.pt inputshape=[1,3,224,224]
              pnnx yolov5s.pt inputshape=[1,3,640,640]f32 inputshape2=[1,3,320,320]f32 device=gpu moduleop=models.common.Focus,models.yolo.Detect

3. Convert yolov8 pt -> ONNX

3.0 Prepare yolov8s-seg.pt

# prepare part
$ cd /home/tianzx/AI/pre_weights/
$ mkdir -p test/yolov8/
$ wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt
$ mkdir normal tensorrt
$ cp yolov8s-seg.pt normal/
$ cp yolov8s-seg.pt tensorrt/

3.1 Using ultralytics convert

$ cd ~/Github/
$ git clone https://github.com/ultralytics/ultralytics.git
$ cd ultralytics
$ pip install onnx==1.12.0 onnxruntime==1.12.0 onnx-simplifier==0.4.8 onnxsim==0.4.13 -i https://pypi.douban.com/simple
$ pip install ultralytics -i https://pypi.douban.com/simple

3.1.1 segment model

Need modify three forwardmethods in ultralytics/ultralytics/nn/modules.py for segment model.

  1. class C2f(nn.Module)
def forward(self, x):
    # y = list(self.cv1(x).split((self.c, self.c), 1))
    # y.extend(m(y[-1]) for m in self.m)
    # return self.cv2(torch.cat(y, 1))
    # !< https://github.com/FeiGeChuanShu/ncnn-android-yolov8
    x = self.cv1(x)
    x = [x, x[:, self.c:, ...]]
    x.extend(m(x[-1]) for m in self.m)
    x.pop(1)
    return self.cv2(torch.cat(x, 1))
  1. class Detect(nn.Module)
def forward(self, x):
    shape = x[0].shape  # BCHW
    for i in range(self.nl):
        x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
    if self.training:
        return x
    elif self.dynamic or self.shape != shape:
        self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
        self.shape = shape
        # box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1)
        # dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides
        # y = torch.cat((dbox, cls.sigmoid()), 1)
        # return y if self.export else (y, x)
        # !< https://github.com/FeiGeChuanShu/ncnn-android-yolov8
    pred = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
    return pred
  1. class Segment(Detect)
def forward(self, x):
    p = self.proto(x[0])  # mask protos
    bs = p.shape[0]  # batch size

    mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2)  # mask coefficients
    x = self.detect(self, x)
    if self.training:
        return x, mc, p
        # return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))
        # !< https://github.com/FeiGeChuanShu/ncnn-android-yolov8
    return (torch.cat([x, mc], 1).permute(0, 2, 1), p.view(bs, self.nm, -1)) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))

3.1.2 Detect model

Need modify two forwardmethods in ultralytics/ultralytics/nn/modules.py for Detect model.

  1. class C2f(nn.Module)
def forward(self, x):
    # y = list(self.cv1(x).split((self.c, self.c), 1))
    # y.extend(m(y[-1]) for m in self.m)
    # return self.cv2(torch.cat(y, 1))
    # !< https://github.com/FeiGeChuanShu/ncnn-android-yolov8
    x = self.cv1(x)
    x = [x, x[:, self.c:, ...]]
    x.extend(m(x[-1]) for m in self.m)
    x.pop(1)
    return self.cv2(torch.cat(x, 1))
  1. class Detect(nn.Module)
def forward(self, x):
    shape = x[0].shape  # BCHW
    for i in range(self.nl):
        x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
    if self.training:
        return x
    elif self.dynamic or self.shape != shape:
        self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
        self.shape = shape
        # box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1)
        # dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides
        # y = torch.cat((dbox, cls.sigmoid()), 1)
        # return y if self.export else (y, x)
        # !< https://github.com/FeiGeChuanShu/ncnn-android-yolov8
    pred = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).permute(0, 2, 1)
    return pred
  • export.py
from ultralytics import YOLO

# 加载模型
#model = YOLO("yolov8n.yaml")  # 从头开始构建新模型
model = YOLO("/home/tianzx/AI/pre_weights/test/yolov8/normal/yolov8s-seg.pt")  # 加载预训练模型(推荐用于训练)

# Use the model
#results = model.train(data="coco128.yaml", epochs=3)  # 训练模型
#results = model.val()  # 在验证集上评估模型性能
#results = model("https://ultralytics.com/images/bus.jpg")  # 预测图像
success = model.export(format="onnx", opset=12, simplify=True)  # 将模型导出为 ONNX 格式
# success = model.export(format="torchscript")
  • convert yolov8s-seg.ptto yolov8s-seg.onnx
$ cd ~/Github/ultralytics
$ python export.py
$ ls /home/tianzx/AI/pre_weights/test/yolov8/normal/
yolov8s-seg.pt  yolov8s-seg.onnx

3.2 Using YOLOv8-TensorRT convert

$ cd ~/Github/
$ git clone https://github.com/triple-Mu/YOLOv8-TensorRT
$ cd YOLOv8-TensorRT
# [optional]
$ pip install tensorrt -i https://pypi.douban.com/simple
$ python export_seg.py --weights /home/tianzx/AI/pre_weights/test/yolov8/tensorrt/yolov8s-seg.pt --opset 12 --sim
$ ls /home/tianzx/AI/pre_weights/test/yolov8/tensorrt/
yolov8s-seg.pt  yolov8s-seg.onnx

4. Convert onnx to ncnn

# normal
$ cd /home/tianzx/AI/pre_weights/test/yolov8/normal/
$ onnx2ncnn yolov8s-seg.onnx yolov8s-seg.param yolov8s-seg.bin
$ ls -hl
total 114M
-rw-rw-r-- 1 tianzx tianzx 46M 2月   2 15:01 yolov8s-seg.bin
-rw-rw-r-- 1 tianzx tianzx 20K 2月   2 15:01 yolov8s-seg.param
-rw-rw-r-- 1 tianzx tianzx 46M 2月   2 09:51 yolov8s-seg.onnx
-rw-rw-r-- 1 tianzx tianzx 23M 2月   2 14:50 yolov8s-seg.pt

# tensorrt
$ cd /home/tianzx/AI/pre_weights/test/yolov8/tensorrt/
$ onnx2ncnn yolov8s-seg.onnx
ArgMax not supported yet!
  # axis=-1
  # keepdims=1
$ ls -hl
total 114M
-rw-rw-r-- 1 tianzx tianzx 46M 2月   2 15:02 ncnn.bin
-rw-rw-r-- 1 tianzx tianzx 20K 2月   2 15:02 ncnn.param
-rw-rw-r-- 1 tianzx tianzx 46M 2月   2 09:47 yolov8s-seg.onnx
-rw-rw-r-- 1 tianzx tianzx 23M 2月   2 15:03 yolov8s-seg.pt

5. Test ncnn model

$ git clone https://github.com/FeiGeChuanShu/ncnn-android-yolov8
$ cd ncnn-android-yolov8/ncnn-yolov8s-seg

5.1 modify yolov8-seg.cpp

(1) change output name in detect_yolov8 function

    ncnn::Mat out;
    ex.extract("output0", out);

    ncnn::Mat mask_proto;
    ex.extract("output1", mask_proto);

(2) add save result.jpg in draw_objects function

    cv::imshow("image", image);
    cv::imwrite("result.jpg", image);
    cv::waitKey(0);

5.2 add CMakeLists.txt

cmake_minimum_required(VERSION 3.5)
project(ncnn-yolov8s-seg)
set(CMAKE_BUILD_TYPE Release)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -pie -fPIE -fPIC -Wall -O3")

find_package(OpenCV REQUIRED)
if (OpenCV_FOUND)
    message(STATUS "OpenCV_LIBS: ${OpenCV_LIBS}")
    message(STATUS "OpenCV_INCLUDE_DIRS: ${OpenCV_INCLUDE_DIRS}")
else ()
    message(FATAL_ERROR "opencv Not Found!")
endif (OpenCV_FOUND)

find_package(OpenMP REQUIRED)
if (OPENMP_FOUND)
    message("OPENMP FOUND")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
else ()
    message(FATAL_ERROR "OpenMP Not Found!")
endif ()

include_directories(/usr/local/include)
include_directories(/usr/local/include/ncnn)
link_directories(/usr/local/lib)

# Source files
file(GLOB SRC "*.h" "*.cpp")

add_executable(ncnn-yolov8s-seg ${SRC})
target_link_libraries(ncnn-yolov8s-seg ncnn ${OpenCV_LIBS})

5.3 Build ncnn-yolov8s-seg

$ cd ncnn-android-yolov8/ncnn-yolov8s-seg
$ mkdir build && cd build
$ cmake ..
$ make -j$(nproc)
$ cp ncnn-yolov8s-seg ../
$ ./ncnn-yolov8s-seg /home/tianzx/Pictures/coco_sample.png 
15 = 0.92688 at 12.03 52.23 305.47 x 420.98
15 = 0.89253 at 344.51 25.41 294.49 x 346.10
65 = 0.84357 at 40.06 73.78 135.51 x 44.37
65 = 0.69806 at 334.26 77.02 35.89 x 111.01
57 = 0.68551 at 1.36 0.81 637.40 x 478.19
  • coco_sample.png coco_sample.png

  • result.jpg result.jpg

6. Refers