581
社区成员
发帖
与我相关
我的任务
分享
cudaMalloc((void**) &gpudata, sizeof(int) * DATA_SIZE);/分配内存
cudaMalloc((void**) &result, sizeof(int) * THREAD_NUM* BLOCK_NUM);
cudaMalloc((void**) &time, sizeof(clock_t)* BLOCK_NUM * 2);
cudaMemcpy(gpudata, data, sizeof(int) * DATA_SIZE,cudaMemcpyHostToDevice);
sumOfSquares<<<BLOCK_NUM, THREAD_NUM, 0>>>(gpudata, result, time);
cudaMemcpy(&sum, result, sizeof(int) * THREAD_NUM* BLOCK_NUM, cudaMemcpyDeviceToHost);
cudaMemcpy(&time_used, time, sizeof(clock_t)* BLOCK_NUM * 2,cudaMemcpyDeviceToHost);