579
社区成员
发帖
与我相关
我的任务
分享
#include <thrust/scan.h>
#include <thrust/execution_policy.h>
__global__
void Kernel(int* dv)
{
int i = threadIdx.x;
dv[i] = i;
}
int main()
{
int* dv;
cudaMalloc((void**)&dv, 10*sizeof(int));
Kernel<<<1,10>>>(dv);
thrust::exclusive_scan(thrust::device, dv, dv+10, dv);
int* hv = new int[10];
cudaMemcpy(hv, dv, 10*sizeof(int), cudaMemcpyDeviceToHost);
for (int i=0; i<10; ++i)
{
printf("%d\n",hv[i]);
}
int sum = thrust::reduce(thrust::device, dv, dv+10, 0, thrust::plus<int>());
printf("%d\n", sum);
return 0;
}