CL_OUT_OF_RESOURCES 很纳闷啊
cl代码如下:
__kernel void KMeans(
__global int* nOutCount,
__global float* vecInPointModel,
__global float* vecInCenterModel
)
{
int i = get_global_id(0);
int flag=0;
float minDistance = 9999999.9;
float dis=0.0;
for (int k = 0; k < 2000; k++)
{
for(int j=0;j<50;j++)
{
float l = fabs(vecInPointModel[i*50+j]- vecInCenterModel[k*50+j]);
dis = dis +l;//dis += l;
}
if (dis <=minDistance )
{
flag = k;
minDistance = dis;
}
dis=0.0;
}
nOutCount[i] = flag;
}
cpu部分代码:
#define N 1000*128 //宏定义
#define CN 2000
#define DIM 50
float* vecInPointModel = new float[N*DIM];//申请内存
float* vecInCenterModel = new float[CN*DIM];
int* nOutCount = new int[N];
//clCreateBuffer
clmemOut_OutU = clCreateBuffer(m_context, CL_MEM_READ_WRITE, sizeof(int)*N, NULL, NULL);
clmemArrayIn_Point = clCreateBuffer(m_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float)*N*DIM, vecInPointModel, NULL);
clmemArrayIn_Center = clCreateBuffer(m_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float)*CN*DIM, vecInCenterModel, NULL);
int a = 0;
err = clSetKernelArg(m_kernel_Add,a++,sizeof(cl_mem),&clmemOut_OutU);//设置参数
err |= clSetKernelArg(m_kernel_Add,a++,sizeof(cl_mem),&clmemArrayIn_Point);
err |= clSetKernelArg(m_kernel_Add,a++,sizeof(cl_mem),&clmemArrayIn_Center);
size_t localws[1] = {32};
size_t globalws[1] = {N};
int* nOutSumCount = new int[N];
//运行
err = clEnqueueNDRangeKernel(m_commandQueue,m_kernel_Add,1,NULL,globalws,localws,0,NULL,NULL);
clFinish(m_commandQueue);
//读取
err = clEnqueueReadBuffer(m_commandQueue, clmemOut_OutU,CL_TRUE,0,sizeof(int)*N,nOutSumCount,0,0,0);
Interl运行正常 NVIDIA运行clEnqueueReadBuffer报错CL_OUT_OF_RESOURCES
不用担心内存申请过大,Interl GPU才384M NVIDIA是512M
也不用担心globalsize和localsize
也不用担心数组越界
另外通过测试:(只进行参考 避免误导)
for(int j=0;j<50;j++)
{
float l = fabs(vecInPointModel[i*50+j]- vecInCenterModel[k*50+j]);
dis = dis +l;//dis += l;
}
if (dis <=minDistance )
{
flag = k;
minDistance = dis;
}
完全屏蔽if或者for NVIDIA都能正常运行,否则CL_OUT_OF_RESOURCES
屏蔽for里面的dis = dis +l; 也能正常运行
或者把dis = dis +l;改成dis += 1.0; 也能正常运行。为什么跟vecInPointModel[i*50+j]这个联系上,就不行?
网上查找没有能够找出错误,还请大神帮个忙,帮小弟看看