580
社区成员
发帖
与我相关
我的任务
分享
#include <stdio.h>
#include <opencv2/opencv.hpp>
#include <string>
#include <iostream>
#include <opencv/cv.h>
#include <opencv/highgui.h>
#include "cuda.h"
#include <time.h>
#define DIM 16
using namespace cv;
texture<float> texR;
texture<float> texG;
texture<float> texB;
texture<float> texOut;
__global__ void kernel(float *dev_out)
{
int idx=threadIdx.x+blockIdx.x*blockDim.x;
int idy=threadIdx.y+blockIdx.y*blockDim.y;
int offset=idx+idy*blockDim.x*gridDim.x;
float dev_r,dev_g,dev_b;
dev_r=tex1Dfetch(texR,offset);
dev_g=tex1Dfetch(texG,offset);
dev_b=tex1Dfetch(texB,offset);
dev_out[offset]=dev_r*0.5+dev_g*0.5+dev_b*0.5;
}
int main()
{
//image path
char* path="//home//ubunserver//yoona.jpg";
IplImage *image=cvLoadImage(path,0);
int width=image->width;
int height=image->height;
int imagesize=width*height;
float *r,*g,*b;
float *out;
//r,g,b,out均为cuda端的数组
cudaMalloc((void**)&r,sizeof(float)*imagesize);
cudaMalloc((void**)&g,sizeof(float)*imagesize);
cudaMalloc((void**)&b,sizeof(float)*imagesize);
cudaMalloc((void**)&out,sizeof(float)*imagesize);
//texR,texG,texB均为纹理内存
cudaBindTexture(NULL,texR,r,sizeof(float)*imagesize);
cudaBindTexture(NULL,texG,g,sizeof(float)*imagesize);
cudaBindTexture(NULL,texB,b,sizeof(float)*imagesize);
//cuda端计时器
cudaEvent_t sta,sto;
cudaEventCreate(&sta);
cudaEventCreate(&sto);
//cpu端数组
float *host_r,*host_g,*host_b,*host_out;
host_r=(float*)malloc(sizeof(float)*imagesize);
host_g=(float*)malloc(sizeof(float)*imagesize);
host_b=(float*)malloc(sizeof(float)*imagesize);
host_out=(float*)malloc(sizeof(float)*imagesize);
//分别取图像rgb
for (int x=0;x<height;x++){
for (int y=0;y<width;y++){
host_b[x*width+y]=(float)((uchar*)(image->imageData+x*image->width+y))[0];
host_g[x*width+y]=(float)((uchar*)(image->imageData+x*image->width+y))[1];
host_r[x*width+y]=(float)((uchar*)(image->imageData+x*image->width+y))[2];
}
}
cudaEventRecord(sta,0);
cudaMemcpy(r,host_r,sizeof(float)*imagesize,cudaMemcpyHostToDevice);
cudaMemcpy(g,host_g,sizeof(float)*imagesize,cudaMemcpyHostToDevice);
cudaMemcpy(b,host_b,sizeof(float)*imagesize,cudaMemcpyHostToDevice);
cudaMemcpy(out,0,sizeof(float)*imagesize,cudaMemcpyHostToDevice);
dim3 blockDim(DIM,DIM);
dim3 gridDim((width+DIM-1)/DIM,(height+DIM-1)/DIM);
kernel<<<blockDim,gridDim>>>(out);
cudaMemcpy(host_out,out,sizeof(float)*imagesize,cudaMemcpyDeviceToHost);
cudaEventRecord(sto,0);
cudaEventSynchronize(sto);
float elapsedTime;
cudaEventElapsedTime(&elapsedTime,sta,sto);
printf("cuda time: %3.1f ms\n",elapsedTime);
cudaFree(r);
cudaFree(g);
cudaFree(b);
cudaFree(out);
//cpu端计时
clock_t start,stop;
start=clock();
for (int i=0;i<height;i++)
for(int j=0;j<width;j++){
host_out[i*width+j]=host_r[i*width+j]*0.5+host_g[i*width+j]*0.5+0.5*host_b[i*width+j];
}
stop=clock();
printf("time: %3.1f ms\n",double(stop-start)/CLOCKS_PER_SEC*1000);
//图片显示
IplImage *grey_image=cvCreateImage(cvSize(width,height),IPL_DEPTH_8U,1);
cvCvtColor(image, grey_image, CV_BGR2GRAY);
for (size_t i=0;i<height;i++){
uchar* ptr=(uchar*)(grey_image->imageData+i*width);
for (size_t j=0;j<width;j++){
ptr[j]=host_out[i*width+j];
}
}
/*
for (int i=0;i<width;i++){
for (int j=0;j<height;j++){
grey_image[i*width+j]=(unsigned char)(host_out[i*width+j]);
}
}
*/
cvNamedWindow("w1");
cvShowImage("w1",grey_image);
cvWaitKey(10000);
cvDestroyAllWindows();
cvReleaseImage(&grey_image);
free(host_r);
free(host_g);
free(host_b);
free(host_out);
return 0;
}