但是其cuda源码是有问题的,没有cmakelists.txt
背景
采用cuda gpu交换opencv图像的 r, b通道
0. 代码
main.cpp
#include <stdlib.h>
#include <stdio.h>
#include <opencv/cv.h>
#include <opencv/highgui.h>
#include <opencv2/opencv.hpp>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
using namespace cv;
extern "C" void swap_rb_caller(const uchar3* src,uchar3* dst,int width,int height);
int main()
{
Mat image = imread("lena_1.jpg");
imshow("src",image);
size_t memSize1 = image.cols*image.rows*sizeof(uchar3);
int memSize = image.step * image.rows;
uchar3* d_src = NULL;
uchar3* d_dst = NULL;
cudaMalloc((void**)&d_src,memSize);
cudaMalloc((void**)&d_dst,memSize);
cudaMemcpy(d_src,image.data,memSize,cudaMemcpyHostToDevice);
swap_rb_caller(d_src, d_dst, image.cols, image.rows);
cudaMemcpy(image.data,d_dst,memSize,cudaMemcpyDeviceToHost);
imshow("gpu",image);
waitKey(0);
cudaFree(d_src);
cudaFree(d_dst);
return 0;
}
kernel.cu
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
__global__ void swap_rb_kernel(const uchar3* src,uchar3* dst,int width,int height)
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
if(x < width && y < height)
{
uchar3 v = src[y * width + x];
dst[y * width + x].x = v.z;
dst[y * width + x].y = v.y;
dst[y * width + x].z = v.x;
}
}
extern "C"
void swap_rb_caller(const uchar3* src,uchar3* dst,int width,int height)
{
dim3 block(32,32);
dim3 grid((width + block.x - 1)/block.x, (height + block.y - 1)/block.y);
swap_rb_kernel<<<grid,block,0>>>(src, dst, width, height);
cudaDeviceSynchronize();
}
cmakelists.txt
cmake_minimum_required(VERSION 2.8.0)
project(demo)
set(CMAKE_BUILD_TYPE Debug)
# OPENCV
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
find_package(CUDA REQUIRED)
CUDA_ADD_EXECUTABLE(demo main.cpp kernel.cu)
target_link_libraries(demo ${OpenCV_LIBS})
没有积分的,留下邮箱,后续发送
1. 结果