581
社区成员
发帖
与我相关
我的任务
分享
#include <stdio.h>
#include <stdlib.h>
#include <cutil_inline.h>
__global__ static void HelloCUDA(char* result, char *d_HelloCUDA)
{
int tid = threadIdx.x + blockIdx.x * 5;
result[tid] = d_HelloCUDA[tid];
}
int main(int argc, char* argv[])
{
char *device_result = 0;
char *d_HelloCUDA = 0;
char host_result[63] ={0};
char h_HelloCUDA[] = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
cutilSafeCall( cudaMalloc((void**) &device_result, sizeof(char) * sizeof(host_result)));
cutilSafeCall( cudaMalloc((void**) &d_HelloCUDA, sizeof(char) * sizeof(h_HelloCUDA)));
cutilSafeCall( cudaMemcpy(d_HelloCUDA,h_HelloCUDA, sizeof(char) * sizeof(h_HelloCUDA), cudaMemcpyHostToDevice));
HelloCUDA<<<2, 5, 0>>>(device_result,d_HelloCUDA);
cutilCheckMsg("Kernel execution failed\n");
cudaThreadSynchronize();
cutilSafeCall( cudaMemcpy(host_result, device_result, sizeof(char) * 62, cudaMemcpyDeviceToHost));
printf("%s\n", host_result);
cutilSafeCall( cudaFree(device_result));
return 0;
}