589
社区成员




#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <cutil.h>
/************************************************************************/
/* Init CUDA */
/************************************************************************/
#if __DEVICE_EMULATION__
bool InitCUDA(void){return true;}
#else
#define SIZE_X 8
#define SIZE_Y 8
#define SIZE_Z 8
__device__ float* d_val; //全局变量,存储kernel函数计算后的值
#endif
/************************************************************************/
/* Example */
/************************************************************************/
__global__ static void HelloCUDA()
{
for (int z=0;z<SIZE_Z;++z)
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
while (y<SIZE_Y)
{
x = threadIdx.x + blockIdx.x * blockDim.x;
while(x<SIZE_X)
{
d_val[x+SIZE_X*y+SIZE_X*SIZE_Y*z] = 56;/*computePower(d_value,EQUIP_NUM,x,y,z);*/
x+=blockDim.x*gridDim.x;
}
y+=blockDim.y*gridDim.y;
}
}
}
extern "C"
void kernel(dim3 gridSize, dim3 blockSize, float *h_val)
{
HelloCUDA<<<gridSize,blockSize>>>();
CUDA_SAFE_CALL( cudaMemcpy(h_val, d_val, sizeof(float)*SIZE_X*SIZE_Y*SIZE_Z, cudaMemcpyDeviceToHost));//最后程序运行,显示此处无效的参数,我觉得应该是kernel函数没有启动,应该是d_val没有分配到空间
}
extern "C"
void InitVal()
{
CUDA_SAFE_CALL( cudaMalloc((void**) &d_val, sizeof(float) * SIZE_X*SIZE_Y*SIZE_Z)); //给全局变量在分配空间,但好像没分配上
}
extern "C"
void clean()
{
CUDA_SAFE_CALL(cudaFree(d_val));
}
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <cutil.h>
#define SIZE_X 8
#define SIZE_Y 8
#define SIZE_Z 8
extern "C" void kernel(dim3 gridSize, dim3 blockSize, float *h_val);
extern "C" void InitVal();
extern "C" void clean();
/************************************************************************/
/* Init CUDA */
/************************************************************************/
bool InitCUDA(void)
{
int count = 0;
int i = 0;
cudaGetDeviceCount(&count);
if(count == 0) {
fprintf(stderr, "There is no device.\n");
return false;
}
for(i = 0; i < count; i++) {
cudaDeviceProp prop;
if(cudaGetDeviceProperties(&prop, i) == cudaSuccess) {
if(prop.major >= 1) {
break;
}
}
}
if(i == count) {
fprintf(stderr, "There is no device supporting CUDA.\n");
return false;
}
cudaSetDevice(i);
printf("CUDA initialized.\n");
return true;
}
int iDivUp(int a, int b){
return (a % b != 0) ? (a / b + 1) : (a / b);
}
/************************************************************************/
/* HelloCUDA */
/************************************************************************/
int main(int argc, char* argv[])
{
if(!InitCUDA()) {
return 0;
}
float *h_val;
h_val = (float*)malloc(SIZE_X*SIZE_Y*SIZE_Z*sizeof(float));
unsigned int timer = 0;
CUT_SAFE_CALL( cutCreateTimer( &timer));
CUT_SAFE_CALL( cutStartTimer( timer));
dim3 blockSize = dim3(8,8);
dim3 gridSize = dim3(iDivUp(SIZE_X,blockSize.x), iDivUp(SIZE_Y,blockSize.y));
kernel(gridSize, blockSize,h_val);
CUT_CHECK_ERROR("Kernel execution failed\n");
CUDA_SAFE_CALL( cudaDeviceSynchronize() );
CUT_SAFE_CALL( cutStopTimer( timer));
printf("Processing time: %f (ms)\n", cutGetTimerValue( timer));
CUT_SAFE_CALL( cutDeleteTimer( timer));
for (int k= 0;k<SIZE_X;++k)
{
for (int j= 0;j<SIZE_Y;++j)
{
for (int i= 0;i<SIZE_Z;++i)
printf("%f\n",h_val[i+j*SIZE_X+k*SIZE_Y*SIZE_X]);
printf("\n");
}
printf("\n");
}
printf("-------------->%f", h_val[4]);
//printf("%f\n",h_val);
free(h_val);
clean();
CUT_EXIT(argc, argv);
return 0;
}