1. Build ncnn
# prepare part
$ cd ~/Github/
$ git clone https://github.com/Tencent/ncnn.git
$ cd ncnn
$ git submodule update --init
$ sudo apt install build-essential git cmake libprotobuf-dev protobuf-compiler libvulkan-dev vulkan-utils libopencv-dev
# build part
$ mkdir build && cd build
$ cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local/ ..
$ make -j$(nproc)
$ sudo make install
2. Build pnnx
Ubuntu 18.04.6 LTS
torch 1.8.0+cpu
torchaudio 0.8.0
torchvision 0.9.0+cpu
cmake 3.25.0
ninja 1.11.1
gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
$ cd ~/Github/ncnn/
$ cd tools/pnnx/
$ pip install cmake==3.25.0 ninja==1.11.1 -i https://pypi.douban.com/simple
$ cmake -Bbuild -GNinja
$ cmake --build build --config Release
$ ./build/src/pnnx
Usage: pnnx [model.pt] [(key=value)...]
pnnxparam=model.pnnx.param
pnnxbin=model.pnnx.bin
pnnxpy=model_pnnx.py
pnnxonnx=model.pnnx.onnx
ncnnparam=model.ncnn.param
ncnnbin=model.ncnn.bin
ncnnpy=model_ncnn.py
fp16=1
optlevel=2
device=cpu/gpu
inputshape=[1,3,224,224],...
inputshape2=[1,3,320,320],...
customop=/home/nihui/.cache/torch_extensions/fused/fused.so,...
moduleop=models.common.Focus,models.yolo.Detect,...
Sample usage: pnnx mobilenet_v2.pt inputshape=[1,3,224,224]
pnnx yolov5s.pt inputshape=[1,3,640,640]f32 inputshape2=[1,3,320,320]f32 device=gpu moduleop=models.common.Focus,models.yolo.Detect
3. Convert yolov8 pt -> ONNX
3.0 Prepare yolov8s-seg.pt
# prepare part
$ cd /home/tianzx/AI/pre_weights/
$ mkdir -p test/yolov8/
$ wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt
$ mkdir normal tensorrt
$ cp yolov8s-seg.pt normal/
$ cp yolov8s-seg.pt tensorrt/
3.1 Using ultralytics convert
$ cd ~/Github/
$ git clone https://github.com/ultralytics/ultralytics.git
$ cd ultralytics
$ pip install onnx==1.12.0 onnxruntime==1.12.0 onnx-simplifier==0.4.8 onnxsim==0.4.13 -i https://pypi.douban.com/simple
$ pip install ultralytics -i https://pypi.douban.com/simple
3.1.1 segment model
Need modify three forward
methods in ultralytics/ultralytics/nn/modules.py
for segment model.
class C2f(nn.Module)
def forward(self, x):
# y = list(self.cv1(x).split((self.c, self.c), 1))
# y.extend(m(y[-1]) for m in self.m)
# return self.cv2(torch.cat(y, 1))
# !< https://github.com/FeiGeChuanShu/ncnn-android-yolov8
x = self.cv1(x)
x = [x, x[:, self.c:, ...]]
x.extend(m(x[-1]) for m in self.m)
x.pop(1)
return self.cv2(torch.cat(x, 1))
class Detect(nn.Module)
def forward(self, x):
shape = x[0].shape # BCHW
for i in range(self.nl):
x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
if self.training:
return x
elif self.dynamic or self.shape != shape:
self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
self.shape = shape
# box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1)
# dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides
# y = torch.cat((dbox, cls.sigmoid()), 1)
# return y if self.export else (y, x)
# !< https://github.com/FeiGeChuanShu/ncnn-android-yolov8
pred = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
return pred
class Segment(Detect)
def forward(self, x):
p = self.proto(x[0]) # mask protos
bs = p.shape[0] # batch size
mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2) # mask coefficients
x = self.detect(self, x)
if self.training:
return x, mc, p
# return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))
# !< https://github.com/FeiGeChuanShu/ncnn-android-yolov8
return (torch.cat([x, mc], 1).permute(0, 2, 1), p.view(bs, self.nm, -1)) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))
3.1.2 Detect model
Need modify two forward
methods in ultralytics/ultralytics/nn/modules.py
for Detect model.
class C2f(nn.Module)
def forward(self, x):
# y = list(self.cv1(x).split((self.c, self.c), 1))
# y.extend(m(y[-1]) for m in self.m)
# return self.cv2(torch.cat(y, 1))
# !< https://github.com/FeiGeChuanShu/ncnn-android-yolov8
x = self.cv1(x)
x = [x, x[:, self.c:, ...]]
x.extend(m(x[-1]) for m in self.m)
x.pop(1)
return self.cv2(torch.cat(x, 1))
class Detect(nn.Module)
def forward(self, x):
shape = x[0].shape # BCHW
for i in range(self.nl):
x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
if self.training:
return x
elif self.dynamic or self.shape != shape:
self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
self.shape = shape
# box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1)
# dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides
# y = torch.cat((dbox, cls.sigmoid()), 1)
# return y if self.export else (y, x)
# !< https://github.com/FeiGeChuanShu/ncnn-android-yolov8
pred = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).permute(0, 2, 1)
return pred
export.py
from ultralytics import YOLO
# 加载模型
#model = YOLO("yolov8n.yaml") # 从头开始构建新模型
model = YOLO("/home/tianzx/AI/pre_weights/test/yolov8/normal/yolov8s-seg.pt") # 加载预训练模型(推荐用于训练)
# Use the model
#results = model.train(data="coco128.yaml", epochs=3) # 训练模型
#results = model.val() # 在验证集上评估模型性能
#results = model("https://ultralytics.com/images/bus.jpg") # 预测图像
success = model.export(format="onnx", opset=12, simplify=True) # 将模型导出为 ONNX 格式
# success = model.export(format="torchscript")
- convert
yolov8s-seg.pt
toyolov8s-seg.onnx
$ cd ~/Github/ultralytics
$ python export.py
$ ls /home/tianzx/AI/pre_weights/test/yolov8/normal/
yolov8s-seg.pt yolov8s-seg.onnx
3.2 Using YOLOv8-TensorRT convert
$ cd ~/Github/
$ git clone https://github.com/triple-Mu/YOLOv8-TensorRT
$ cd YOLOv8-TensorRT
# [optional]
$ pip install tensorrt -i https://pypi.douban.com/simple
$ python export_seg.py --weights /home/tianzx/AI/pre_weights/test/yolov8/tensorrt/yolov8s-seg.pt --opset 12 --sim
$ ls /home/tianzx/AI/pre_weights/test/yolov8/tensorrt/
yolov8s-seg.pt yolov8s-seg.onnx
4. Convert onnx to ncnn
# normal
$ cd /home/tianzx/AI/pre_weights/test/yolov8/normal/
$ onnx2ncnn yolov8s-seg.onnx yolov8s-seg.param yolov8s-seg.bin
$ ls -hl
total 114M
-rw-rw-r-- 1 tianzx tianzx 46M 2月 2 15:01 yolov8s-seg.bin
-rw-rw-r-- 1 tianzx tianzx 20K 2月 2 15:01 yolov8s-seg.param
-rw-rw-r-- 1 tianzx tianzx 46M 2月 2 09:51 yolov8s-seg.onnx
-rw-rw-r-- 1 tianzx tianzx 23M 2月 2 14:50 yolov8s-seg.pt
# tensorrt
$ cd /home/tianzx/AI/pre_weights/test/yolov8/tensorrt/
$ onnx2ncnn yolov8s-seg.onnx
ArgMax not supported yet!
# axis=-1
# keepdims=1
$ ls -hl
total 114M
-rw-rw-r-- 1 tianzx tianzx 46M 2月 2 15:02 ncnn.bin
-rw-rw-r-- 1 tianzx tianzx 20K 2月 2 15:02 ncnn.param
-rw-rw-r-- 1 tianzx tianzx 46M 2月 2 09:47 yolov8s-seg.onnx
-rw-rw-r-- 1 tianzx tianzx 23M 2月 2 15:03 yolov8s-seg.pt
5. Test ncnn model
$ git clone https://github.com/FeiGeChuanShu/ncnn-android-yolov8
$ cd ncnn-android-yolov8/ncnn-yolov8s-seg
5.1 modify yolov8-seg.cpp
(1) change output name in detect_yolov8
function
ncnn::Mat out;
ex.extract("output0", out);
ncnn::Mat mask_proto;
ex.extract("output1", mask_proto);
(2) add save result.jpg
in draw_objects
function
cv::imshow("image", image);
cv::imwrite("result.jpg", image);
cv::waitKey(0);
5.2 add CMakeLists.txt
cmake_minimum_required(VERSION 3.5)
project(ncnn-yolov8s-seg)
set(CMAKE_BUILD_TYPE Release)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -pie -fPIE -fPIC -Wall -O3")
find_package(OpenCV REQUIRED)
if (OpenCV_FOUND)
message(STATUS "OpenCV_LIBS: ${OpenCV_LIBS}")
message(STATUS "OpenCV_INCLUDE_DIRS: ${OpenCV_INCLUDE_DIRS}")
else ()
message(FATAL_ERROR "opencv Not Found!")
endif (OpenCV_FOUND)
find_package(OpenMP REQUIRED)
if (OPENMP_FOUND)
message("OPENMP FOUND")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
else ()
message(FATAL_ERROR "OpenMP Not Found!")
endif ()
include_directories(/usr/local/include)
include_directories(/usr/local/include/ncnn)
link_directories(/usr/local/lib)
# Source files
file(GLOB SRC "*.h" "*.cpp")
add_executable(ncnn-yolov8s-seg ${SRC})
target_link_libraries(ncnn-yolov8s-seg ncnn ${OpenCV_LIBS})
5.3 Build ncnn-yolov8s-seg
$ cd ncnn-android-yolov8/ncnn-yolov8s-seg
$ mkdir build && cd build
$ cmake ..
$ make -j$(nproc)
$ cp ncnn-yolov8s-seg ../
$ ./ncnn-yolov8s-seg /home/tianzx/Pictures/coco_sample.png
15 = 0.92688 at 12.03 52.23 305.47 x 420.98
15 = 0.89253 at 344.51 25.41 294.49 x 346.10
65 = 0.84357 at 40.06 73.78 135.51 x 44.37
65 = 0.69806 at 334.26 77.02 35.89 x 111.01
57 = 0.68551 at 1.36 0.81 637.40 x 478.19
-
coco_sample.png
-
result.jpg
6. Refers
- https://github.com/Tencent/ncnn/wiki/how-to-build#pass-for-linux
- PNNX编译流程(新)https://zhuanlan.zhihu.com/p/444022507
- 手工优化ncnn模型结构 https://zhuanlan.zhihu.com/p/93017149
- https://github.com/FeiGeChuanShu/ncnn-android-yolov8
- https://github.com/triple-Mu/YOLOv8-TensorRT
- 模型转换问题 https://github.com/FeiGeChuanShu/ncnn-android-yolov8/issues/1
- ArgMax not supported yet https://github.com/Tencent/ncnn/issues/2582