CUDA二维矩阵加法运算
我在GPU上实现二维矩阵加法运算时,代码如下,可以运行
void main()
{
float (*a)[4]=new float[2][4];
float (*b)[4]=new float[2][4];
float (*dev_a)[4],(*dev_b)[4];
a[0][0]=1;a[0][1]=2;a[0][2]=3;a[0][3]=4;
a[1][0]=5;a[1][1]=6;a[1][2]=7;a[1][3]=8;
cudaMalloc((void **)&dev_a, 2 * 4 * sizeof(float) );
cudaMalloc((void **)&dev_b, 2 * 4 * sizeof(float) );
cudaMemcpy( dev_a, a, 2 * 4 * sizeof(float), cudaMemcpyHostToDevice );
delete []a;
dim3 blocks(1,1);
dim3 threads(2,4);
add<<<blocks,threads>>>(dev_a,dev_b);
cudaMemcpy( b, dev_b, 2 * 4 * sizeof(float), cudaMemcpyDeviceToHost );
cudaFree(dev_a);
cudaFree(dev_b);
cout<<b[0][0]<<endl;cout<<b[0][1]<<endl;cout<<b[0][2]<<endl;cout<<b[0][3]<<endl;
cout<<b[1][0]<<endl;cout<<b[1][1]<<endl;cout<<b[1][2]<<endl;cout<<b[1][3]<<endl;
delete []b;
}
__global__ void add(float m[][4],float n[][4])
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
if(x<2 && y<4)
n[x][y]=m[x][y]+10;
}
问题来了:在GPU上第一次运行生成后,dev_b值变成固定的了,当改变if(x<-2 && y<-4)或者屏蔽掉add<<<blocks,threads>>>(dev_a,dev_b)时dev_b的值依然不变,好像dev_b第一次运行生成的值被写进内存无法更改了似的,请问这是什么问题?
谢谢指教!!