353
社区成员
发帖
与我相关
我的任务
分享
__global__ void computeDistanceKernel(double * d_dist, double * d_ref, double * d_query, int width, int height)
{
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < width && y < height)
{
double query = d_query[y];
double dist = 0;
for (int i = 0; i < height; i++)
{
double ref = d_ref[i*width+x];
dist = dist + abs(query - ref);
}
__syncthreads();
d_dist[y*width+x] = dist;
}
}
__global__ void computeDistanceKernel(double * d_dist, double * d_ref, double * d_query, int width, int height)
{
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < width && y < height)
{
double query = d_query[y];
double dist = 0;
for (int i = 0; i < height; i++)
{
double ref = d_ref[i*width+x];
dist = dist + abs(query - ref);
}
__syncthreads();
//d_dist[y*width+x] = dist;
}
}
int main()
{
int width = 10000;
int height = 400;
double * h_ref = new double[width*height];
double * h_query = new double[height];
for (int i = 0; i < width*height; i++)
{
h_ref[i] = rand() % 10;
}
for (int i = 0; i < height; i++)
{
h_query[i] = rand() % 10;
}
double *d_ref, *d_query;
double *d_dist, *d_dist2;
cutilSafeCall( cudaMalloc( (void **) &d_ref, width*height*sizeof(double)) );
cutilSafeCall( cudaMemcpy(d_ref, h_ref, width*height*sizeof(double), cudaMemcpyHostToDevice) );
cutilSafeCall( cudaMalloc( (void **) &d_query, height*sizeof(double)) );
cutilSafeCall( cudaMemcpy(d_query, h_query, height*sizeof(double), cudaMemcpyHostToDevice) );
cutilSafeCall( cudaMalloc( (void **) &d_dist, width*height*sizeof(double)));
cutilSafeCall( cudaMalloc( (void **) &d_dist2, width*sizeof(double)));
//计时开始timer
unsigned int timer = 0;
cutilCheckError( cutCreateTimer( &timer));
cutilCheckError( cutStartTimer( timer));
dim3 dimBlock(16, 16, 1);
dim3 dimGrid(width/16, height/16, 1);
if (width%16 != 0) dimGrid.x += 1;
if (height%16 != 0) dimGrid.x += 1;
computeDistanceKernel<<<dimGrid, dimBlock>>>(d_dist, d_ref, d_query, width, height);
cutilSafeCall(cudaThreadSynchronize());
//计时结束timer
cutilCheckError( cutStopTimer( timer));
printf("computeDifferenceKernel Processing time: %f (ms)\n", cutGetTimerValue( timer));
cutilCheckError( cutDeleteTimer( timer));
cutilSafeCall(cudaFree(d_dist));
cutilSafeCall(cudaFree(d_ref));
cutilSafeCall(cudaFree(d_query));
}