this declaration has no storage class or type specifier
刚接触CUDA,一个试手程序出了点问题,麻烦帮助。
//host code
#include <stdio.h>
#include <increment_kernal.cu>
#define N 16
void main()
{
// allocate host memory
int numBytes = N * sizeof(float)
float* h_A = (float*) malloc(numBytes);
// allocate device memory
float* d_A = 0;
cudaMalloc((void**)&d_A, numbytes);
// copy data from host to device
cudaMemcpy(d_A, h_A, numBytes, cudaMemcpyHostToDevice);
// execute the kernel
float blockSize = 4;
float b = 1;
increment_gpu<<< N/blockSize, blockSize>>>(d_A, b, N);
// copy data from device back to host
cudaMemcpy(h_A, d_A, numBytes, cudaMemcpyDeviceToHost);
// free device memory
cudaFree(d_A);
}
//device code
#include <stdio.h>
_global_ void increment_gpu(float *a,float b,int n)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if(idx < n)
a[idx] = a[idx] + b;
}
build之后在_global_ void increment_gpu(float *a,float b,int n)处报错说this declaration has no storage class or type specifier。这是怎么回事?