581
社区成员
发帖
与我相关
我的任务
分享
//.cu
#include <opencv2/cudev.hpp>
#include "opencv2/core/cuda/common.hpp"
#include <iostream>
using namespace cv;
using namespace cv::cuda;
//自定义内核函数
__global__ void run_kernel(PtrStepSz<uchar3>* const& src, PtrStepSz<uchar3>* const & dst, PtrStepSz<uchar3>* const & bg)
//__global__ void swap_rb_kernel(GpuMat* const & src, GpuMat* const & dst, GpuMat* const & bg)
{
int x = threadIdx.x + blockIdx.x * blockDim.x; //blockDim block 尺寸
int y = threadIdx.y + blockIdx.y * blockDim.y;
if (x < src[0].cols && y < src[0].rows)
{
uchar3 pix_bg = bg[0](y, x);
uchar3 pix_src = src[0](y, x);
dst[0](y, x) = make_uchar3(pix_bg.x, pix_bg.y, pix_src.z);
}
}
void run(GpuMat* const & gpu_src, GpuMat* const & gpu_bg, GpuMat* const & gpu_dst, Stream& stream)
{
std::cout << "run {" << std::endl;
dim3 block(32, 8);
printf("grid_xy : (%d,%d)\n", (gpu_src[0].cols + block.x - 1) / block.x, (gpu_src[0].rows + block.y - 1) / block.y);
dim3 grid((gpu_src[0].cols + block.x - 1) / block.x, (gpu_src[0].rows + block.y - 1) / block.y);
run_kernel << <grid, block, 0, 0 >> >((PtrStepSz<uchar3> *)gpu_src, (PtrStepSz<uchar3> *)gpu_bg, (PtrStepSz<uchar3> *)gpu_dst);
cudaDeviceSynchronize();
std::cout << "}" << std::endl;
}
//main.cpp
#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2/core/cuda.hpp>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <opencv2/core/cuda.hpp>
#include <opencv2/core/cuda_stream_accessor.hpp>
using namespace cv;
using namespace cv::cuda;
void run(GpuMat* const &gpu_src, GpuMat* const &gpu_bg, GpuMat* const & gpu_dst, Stream& stream);
int main()
{
cv::cuda::setDevice(0);
Mat src = imread("1118.png");
Mat bg = imread("timg.jpg");
Mat dst = bg.clone();
GpuMat gpu_src[3], gpu_bg[3], gpu_dst[3];
gpu_src[0].upload(src);
gpu_bg[0].upload(bg);
gpu_dst[0].upload(dst);
run(gpu_src, gpu_dst, gpu_bg,Stream::Null());
gpu_dst[0].download(dst);
imshow("gpu", dst);
waitKey(0);
return 0;
}