579
社区成员
发帖
与我相关
我的任务
分享
#include <stdlib.h>
#include <stdio.h>
#include <cutil_inline.h>
#include <cuda.h>
#include <shrUtils.h>
#include <assert.h>
__global__ void cu_arrayDelete(int* arrayIO)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
arrayIO[idx] = arrayIO[idx] - 16;
}
void checkCUDAError(const char *msg)
{
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err) {
printf("Cuda error: %s: %s./n", msg, cudaGetErrorString( err) );
exit(EXIT_FAILURE);
}
}
int main(int argc, char *argv[])
{
int* h_pData;
int* d_pData;
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, 0);
if(!deviceProp.canMapHostMemory) {
printf("Device %d cannot map host memory!/n");
}
cudaSetDeviceFlags(cudaDeviceMapHost);
checkCUDAError("cudaSetDeviceFlags");
cutilSafeCall(cudaHostAlloc((void**)&h_pData, 512, cudaHostAllocMapped));
cudaHostGetDevicePointer((void **)&d_pData, (void *)h_pData, 0);
for(int i=0; i<128; i++)
{
h_pData[i] = 255;
}
cu_arrayDelete<<<4,32>>>(d_pData);
cudaThreadSynchronize();
for(int i = 0 ; i<128; i++ )
printf("%d/n",h_pData[0]);
while(1);
return 0;
}