603
社区成员
发帖
与我相关
我的任务
分享
int main()
{
clock_t start,finish;
double totaltime;
start=clock();
int test_num = 0;
int test_flag = 1;
float* A;
float* B;
float* C;
float* D;
float* E;
float* F;
float* G;
float* result;
A=(float*)malloc(365*sizeof(float));
B=(float*)malloc(365*sizeof(float));
C=(float*)malloc(365*sizeof(float));
D=(float*)malloc(365*sizeof(float));
E=(float*)malloc(365*sizeof(float));
F=(float*)malloc(365*sizeof(float));
G=(float*)malloc(365*sizeof(float));
result=(float*)malloc(365*sizeof(float));
for(int i=0;i<365;i++)
{
A[i]=1992;
}
for(int i=0;i<365;i++)
{
B[i]=1993;
}
for(int i=0;i<365;i++)
{
C[i]=1994;
}
for(int i=0;i<365;i++)
{
D[i]=1995;
}
for(int i=0;i<365;i++)
{
E[i]=1996;
}
for(int i=0;i<365;i++)
{
F[i]=1997;
}
for(int i=0;i<365;i++)
{
G[i]=1998;
}
cl_platform_id platform;
cl_int ciErrNum;
cl_platform_info platformName;
ciErrNum=clGetPlatformIDs(1,&platform,NULL);
if (ciErrNum != CL_SUCCESS)
{
printf("Error: Getting platforms!");
return FAILURE;
}
//get platform information
size_t infoSize;
platformName=CL_PLATFORM_NAME;
ciErrNum=clGetPlatformInfo(platform,platformName,0,NULL,&infoSize);
if (ciErrNum != CL_SUCCESS)
{
printf("Error1: Getting platforms information!");
return FAILURE;
}
char* info=(char*)malloc(sizeof(char)*infoSize);
ciErrNum=clGetPlatformInfo(platform,platformName,infoSize,info,NULL);
if (ciErrNum != CL_SUCCESS)
{
printf("Error2: Getting platforms information!");
return FAILURE;
}
printf("%s\n",info);
free(info);
//Find device.
cl_device_id *device;
device=NULL;
cl_uint numDevice;
ciErrNum=clGetDeviceIDs(platform,CL_DEVICE_TYPE_GPU,4,device,&numDevice);//第二个参数应该为CL_DEVICE_TYPE_GPU,
if (numDevice == 0) //No GPU available.
{
printf( "No GPU device available." );
printf( "Choose CPU as default device.");
ciErrNum = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &numDevice);
device = (cl_device_id*)malloc(numDevice * sizeof(cl_device_id));
ciErrNum = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, numDevice, device, NULL);
}
else
{
device = (cl_device_id* )malloc(numDevice*sizeof(cl_device_id));
ciErrNum = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevice, device, NULL);
}
/*get device information*/
cl_device_info itemName;
itemName=CL_DEVICE_NAME;
cl_uint coreNum;
//cl_uint coreNum;
infoSize=NULL;
ciErrNum=clGetDeviceInfo(*device,itemName,0,NULL,&infoSize);
info=(char*)malloc(sizeof(char)*infoSize);
ciErrNum=clGetDeviceInfo(*device,itemName,infoSize,info,NULL);
printf("%s\n",info);
free(info);
itemName=CL_DEVICE_MAX_COMPUTE_UNITS;
ciErrNum=clGetDeviceInfo(*device,itemName,sizeof(cl_uint),&coreNum,NULL);
printf("Device has max compute units:%d\n",coreNum);
/*Context properties*/
cl_context_properties cps[3]={CL_CONTEXT_PLATFORM,(cl_context_properties)platform,0};
/*Create context */
cl_context ctx=clCreateContext(cps,numDevice,device,NULL,NULL,&ciErrNum);
if(ciErrNum!=CL_SUCCESS)
{
printf("can't create contex");
getchar();
return 0;
}
/*Create command queue*/
cl_command_queue myqueue=clCreateCommandQueue(ctx,*device,0,&ciErrNum);
if(ciErrNum!=CL_SUCCESS)
{
printf("can't create command queue");
getchar();
return 0;
}
start=clock();
// start=clock();
/*Create buffer A*/
cl_mem bufferA=clCreateBuffer(ctx,CL_MEM_READ_ONLY,365*sizeof(float),NULL,&ciErrNum);
/*Copy MAtrix A to device*/
ciErrNum=clEnqueueWriteBuffer(myqueue,bufferA,CL_TRUE,0,365*sizeof(float),A,0,NULL,NULL);
/*Create buffer B*/
cl_mem bufferB=clCreateBuffer(ctx,CL_MEM_READ_ONLY,365*sizeof(float),NULL,&ciErrNum);
/*Copy MAtrix B to device*/
ciErrNum=clEnqueueWriteBuffer(myqueue,bufferB,CL_TRUE,0,365*sizeof(float),B,0,NULL,NULL);
/*Create buffer C*/
cl_mem bufferC=clCreateBuffer(ctx,CL_MEM_READ_ONLY,365*sizeof(float),NULL,&ciErrNum);
/*Copy MAtrix C to device*/
ciErrNum=clEnqueueWriteBuffer(myqueue,bufferC,CL_TRUE,0,365*sizeof(float),C,0,NULL,NULL);
/*Create buffer D*/
cl_mem bufferD=clCreateBuffer(ctx,CL_MEM_READ_ONLY,365*sizeof(float),NULL,&ciErrNum);
/*Copy MAtrix D to device*/
ciErrNum=clEnqueueWriteBuffer(myqueue,bufferD,CL_TRUE,0,365*sizeof(float),D,0,NULL,NULL);
/*Create buffer E*/
cl_mem bufferE=clCreateBuffer(ctx,CL_MEM_READ_ONLY,365*sizeof(float),NULL,&ciErrNum);
/*Copy MAtrix E to device*/
ciErrNum=clEnqueueWriteBuffer(myqueue,bufferE,CL_TRUE,0,365*sizeof(float),E,0,NULL,NULL);
/*Create buffer F*/
cl_mem bufferF=clCreateBuffer(ctx,CL_MEM_READ_ONLY,365*sizeof(float),NULL,&ciErrNum);
/*Copy MAtrix F to device*/
ciErrNum=clEnqueueWriteBuffer(myqueue,bufferF,CL_TRUE,0,365*sizeof(float),F,0,NULL,NULL);
/*Create buffer G*/
cl_mem bufferG=clCreateBuffer(ctx,CL_MEM_READ_ONLY,365*sizeof(float),NULL,&ciErrNum);
/*Copy MAtrix G to device*/
ciErrNum=clEnqueueWriteBuffer(myqueue,bufferG,CL_TRUE,0,365*sizeof(float),G,0,NULL,NULL);
/*Create space for result*/
cl_mem bufferR=clCreateBuffer(ctx,CL_MEM_WRITE_ONLY,365*sizeof(float),NULL,&ciErrNum);
/*Create program object*/
//将kernel转换成String
const char *sourceStr = testKernel;
size_t sourceSize[] = {strlen(sourceStr)};
cl_program program;
program= clCreateProgramWithSource(ctx, numDevice, (const char**)&sourceStr, sourceSize, &ciErrNum);
if(ciErrNum!=CL_SUCCESS)
{
printf("can't create programe");
getchar();
system("pause");
return 0;
}
/*Build program*/
ciErrNum=clBuildProgram(program, numDevice,device,NULL,NULL,NULL);
/*输出build失败的原因,主要是kernel的错误*/
if (ciErrNum!= CL_SUCCESS)
{
size_t len;
char buffer[2048];
printf("Error: Failed to build program executable!\n");
clGetProgramBuildInfo(program, *device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
printf("%s\n", buffer);
exit(1);
}
/*above are very important code */
cl_kernel myKernel=clCreateKernel(program,"testKernel",NULL);
/*Set Kernel arguments*/
clSetKernelArg(myKernel,0,sizeof(cl_mem),&bufferA);
clSetKernelArg(myKernel,1,sizeof(cl_mem),&bufferB);
clSetKernelArg(myKernel,2,sizeof(cl_mem),&bufferC);
clSetKernelArg(myKernel,3,sizeof(cl_mem),&bufferD);
clSetKernelArg(myKernel,4,sizeof(cl_mem),&bufferE);
clSetKernelArg(myKernel,5,sizeof(cl_mem),&bufferF);
clSetKernelArg(myKernel,6,sizeof(cl_mem),&bufferG);
clSetKernelArg(myKernel,7,sizeof(cl_mem),&bufferR);
size_t localws[2]={(size_t)6,(size_t)6};
size_t globalws[2]={(size_t)200,(size_t)200};
ciErrNum=clEnqueueNDRangeKernel(myqueue,myKernel,2,NULL,globalws,NULL,0,NULL,NULL);
finish=clock();
totaltime=(double)(finish-start)/CLOCKS_PER_SEC;;
printf("\nTotal time is %f",totaltime);
ciErrNum=clEnqueueReadBuffer(myqueue,bufferR,CL_TRUE,0,365*sizeof(float),result,0,NULL,NULL);
for(int i=0;i<365;i++)
{
printf("%f\n",result[i]);
}
clReleaseProgram(program);
clReleaseKernel(myKernel);
clReleaseMemObject(bufferA);
clReleaseMemObject(bufferB);
clReleaseMemObject(bufferC);
clReleaseMemObject(bufferD);
clReleaseMemObject(bufferE);
clReleaseMemObject(bufferF);
clReleaseMemObject(bufferG);
clReleaseMemObject(bufferR);
clReleaseCommandQueue(myqueue);
clReleaseContext(ctx);
free(A);
free(B);
free(C);
free(D);
free(E);
free(F);
free(G);
free(result);
system("pause");
const char *testKernel = "\n" \
"__kernel void testKernel( \n" \
" __global float* A, \n" \
" __global float* B, \n" \
" __global float* C, \n" \
" __global float* D, \n" \
" __global float* E, \n" \
" __global float* F, \n" \
" __global float* G, \n" \
" __global float* Result) \n" \
"{ \n" \
" int i=get_local_id(0); \n" \
" while(i < 366) \n" \
" { \n" \
" Result[i]=A[i]+B[i]+C[i]+D[i]+E[i]+F[i]+G[i]; \n" \
" } \n" \
"} \n" \
"\n";