关于cuda指针传递问题
double* d_PixelsSum; //设备
double* h_PixelsSum; //主机
cudaMalloc((void **)&d_PixelsSum,sizeof(double)); //开辟显存
cudaMemset(d_PixelsSum, 0, sizeof(double));
h_PixelsSum = (double*)malloc(sizeof(double)); //开辟内存
//内核函数 用于计算像素值总和
__global__ static void GpuImageDiff8bit(unsigned char* pSrcData8bit, unsigned char* pSrcDataTemp8bit,double* dPixelsSum){
const unsigned long offset = (blockIdx.x * THREAD_N + threadIdx.x);
//dPixelsSum[0] += fabs((double)(pSrcData8bit[offset] - pSrcDataTemp8bit[offset]));
dPixelsSum[0] = 1000000; //或者*dPixelsSum = 1000000都不行
}
//拷贝到主机
cudaMemcpy(h_PixelsSum,d_PixelsSum,sizeof(double),cudaMemcpyDeviceToHost);
double dTemp = *h_PixelsSum; //不是1000000
问题: 为什么拷贝出来的数据都不是100000?是哪里写错了吗 还是初始化不对?