581
社区成员
发帖
与我相关
我的任务
分享
__global__ void addKernel(int *c, const int *a, const int *b)
{
int i = blockIdx.x;
int j = threadIdx.x;
int idx = i*blockDim.x+j;
c[idx] = a[idx] * b[idx];
c[idx] += a[idx] * b[idx];
}
const int arraySize = 100;
int main()
{
int a[arraySize][arraySize];
int b[arraySize][arraySize];
int c[arraySize][arraySize];
int c1[arraySize][arraySize];
for(int i = 0;i<arraySize;i++)
{
for(int j = 0;j<arraySize;j++)
{
a[i][j] = i;
b[i][j] = 2*i;
}
}
cudaError_t cudaStatus;
// Choose which GPU to run on, change this on a multi-GPU system.
cudaStatus = cudaSetDevice(0);
clock_t t1 = clock();
int *dev_a = 0;
int *dev_b = 0;
int *dev_c = 0;
// Allocate GPU buffers for three vectors (two input, one output) .
cudaStatus = cudaMalloc((void**)&dev_c, arraySize * arraySize * sizeof(int));
cudaStatus = cudaMalloc((void**)&dev_a, arraySize * arraySize * sizeof(int));
cudaStatus = cudaMalloc((void**)&dev_b, arraySize * arraySize * sizeof(int));
// Copy input vectors from host memory to GPU buffers.
cudaStatus = cudaMemcpy(dev_a, a, arraySize * arraySize * sizeof(int), cudaMemcpyHostToDevice);
cudaStatus = cudaMemcpy(dev_b, b, arraySize * arraySize * sizeof(int), cudaMemcpyHostToDevice);
// Launch a kernel on the GPU with one thread for each element.
addKernel<<<arraySize, arraySize>>>(dev_c, dev_a, dev_b);
// Check for any errors launching the kernel
cudaStatus = cudaGetLastError();
// cudaDeviceSynchronize waits for the kernel to finish, and returns
// any errors encountered during the launch.
cudaStatus = cudaDeviceSynchronize();
// Copy output vector from GPU buffer to host memory.
cudaStatus = cudaMemcpy(c, dev_c, arraySize * arraySize * sizeof(int), cudaMemcpyDeviceToHost);
clock_t t2 = clock();
cout<<"time:"<<t2-t1<<endl;
for(int i = 0;i<arraySize;i++)
{
for(int j = 0;j<arraySize;j++)
{
c1[i][j] = a[i][j] + b[i][j];
c1[i][j] += a[i][j] * b[i][j];
c1[i][j] += a[i][j] * b[i][j];
c1[i][j] += a[i][j] * b[i][j];
}
}
t1 = clock();
cout<<"time:"<<t1-t2<<endl;
cudaFree(dev_c);
cudaFree(dev_a);
cudaFree(dev_b);
cudaStatus = cudaDeviceReset();
return 0;
}