最近正在学习cuda,试着写了一个程序,大意就是随便输入一个不大于10^9的数字,统计从1到该数字之间的所有整数共包含多少个0,1,2,3,4,5,6,7,8和9。我的思路是定义一个包含10个元素的整型数组并初始化为0,然后利用除模取余的方法进行穷举。并行进程100个。但是在执行后,屏幕闪了一下,然后就输出全是0的结果。请论坛的大神给看看是哪里的问题。
代码如下:
#include<iostream>
#include<time.h>
using namespace std;
#define N 100
__global__ void calculate(int dev_page, int *count){
int tid = blockIdx.x;
int temp, num;
for (int i = tid + 1; i <= dev_page-N+i; i += N){
temp = i;
while (temp != 0){
num = temp % 10;
count[num]++;
temp /= 10;
}
}
}
int main(void){
long start = clock(), finish;
int count[10] = {0,0,0,0,0,0,0,0,0,0};
int *dev_count;
int final_page;
cout << "Input a number:";
cin >> final_page;
cudaMalloc((void**)&dev_count,10*sizeof(int));
cudaMemcpy(dev_count,count,10*sizeof(int),cudaMemcpyHostToDevice);
calculate << <N, 1 >> >(final_page,dev_count);
cudaMemcpy(count, dev_count, 10 * sizeof(int), cudaMemcpyDeviceToHost);
for (int i = 0; i < 10; i++)
cout << "页码中包含的" << i << "有" << count[i] << "个。\n";
cudaFree(dev_count);
finish = clock();
cout << "运行时间" << (finish - start) << "ms." << "\n";
system("pause");
return 0;
}