OUT_OF_RESOURCE_RANGE, OpenCL,并行,无法读取输出内容,初学,求大神指点!!

Groves_Lin 2016-12-08 04:01:53
kernel code:

kernel void hello_kernel(__global const float* a, __global const float* b, __global float* result)
{
int gid = get_global_id(0);
result[gid] = a[gid] + b[gid];
}

source code:

#include "CL/cl.h"
#include <iostream>
#include <fstream>
#include <stdio.h>
#include <CL/cl.hpp>

using namespace std;

const int ARRAY_SIZE = 1000;

int main(int argc, char**argv)
{
//-------------------1. 获得并选择可用平台-----------------------------
cl_int iErrorNum;
cl_uint NumPlatform;
cl_platform_id* pPlatformId;
iErrorNum = clGetPlatformIDs(0, NULL, &NumPlatform);
if (CL_SUCCESS != iErrorNum)
{
cout << "can not found any OpenCL platforms." << endl;
exit(EXIT_FAILURE);
}
pPlatformId = (cl_platform_id*)malloc(NumPlatform * sizeof(cl_platform_id));
iErrorNum = clGetPlatformIDs(NumPlatform, pPlatformId, NULL);
cl_uint i;
//----------------------查询平台信息------------------------------------
for (i = 0; i < NumPlatform; i++)
{


char platformName[20];
iErrorNum = clGetPlatformInfo(pPlatformId[i], CL_PLATFORM_NAME, sizeof(platformName), platformName, NULL);
if (CL_SUCCESS != iErrorNum)
{
cout << "can not get platform Name." << endl;
exit(EXIT_FAILURE);
}
cout << "PlatformName:"<<platformName << endl;

char platformVersion[50];
iErrorNum = clGetPlatformInfo(pPlatformId[i], CL_PLATFORM_VERSION, sizeof(platformVersion), platformVersion, NULL);
if (CL_SUCCESS != iErrorNum)
{
cout << "can not get platform version." << endl;
exit(EXIT_FAILURE);
}
cout << "PlatformVersion:"<<platformVersion << endl;

//----------------------2. 查询GPU设备-----------------------------------
cl_uint NumDevices;
cl_device_id* pDeviceId;
iErrorNum = clGetDeviceIDs(pPlatformId[i], CL_DEVICE_TYPE_GPU, 0, NULL, &NumDevices);
if (CL_SUCCESS != iErrorNum || iErrorNum == CL_DEVICE_NOT_FOUND)
{
cout << "can not get any devices." << endl;
exit(EXIT_FAILURE);
}
pDeviceId = (cl_device_id*)malloc(NumDevices * sizeof(cl_device_id));
iErrorNum = clGetDeviceIDs(pPlatformId[i], CL_DEVICE_TYPE_GPU, NumDevices, pDeviceId, NULL);
cl_uint j;
//---------------------查询设备的详细信息---------------------------
for (j = 0; j < NumDevices; j++)
{
char deviceName[20];
iErrorNum = clGetDeviceInfo(pDeviceId[j], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
if (CL_SUCCESS != iErrorNum)
{
cout << "can not get device Name." << endl;
exit(EXIT_FAILURE);
}
cout << "DeviceName:" << deviceName << endl;

cl_uint device_max_compute_unit = 0;
iErrorNum = clGetDeviceInfo(pDeviceId[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &device_max_compute_unit, NULL);
if (CL_SUCCESS != iErrorNum)
{
cout << "can not get max compute uint." << endl;
exit(EXIT_FAILURE);
}
cout << "device max compute uint:" << device_max_compute_unit << endl;

size_t device_max_work_group_size = 0;
iErrorNum = clGetDeviceInfo(pDeviceId[j], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(device_max_work_group_size), &device_max_work_group_size, NULL);
if (CL_SUCCESS != iErrorNum)
{
cout << "can not get max work group size." << endl;
exit(EXIT_FAILURE);
}
cout << "device max work group size:" << device_max_work_group_size << endl;

cl_uint max_work_item_dimensions = 0;
iErrorNum = clGetDeviceInfo(pDeviceId[j], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &max_work_item_dimensions, NULL);
if (CL_SUCCESS != iErrorNum)
{
cout << "can not get max work item dimensions." << endl;
exit(EXIT_FAILURE);
}
cout << "device max work item dimensions:" << max_work_item_dimensions << endl;
}

//// -------------------3.创建设备环境---------------------------------
cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)pPlatformId[i], 0 };
cl_context context = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, &iErrorNum);
if (NULL == context)
{
cout << "create context fail." << endl;
exit(EXIT_FAILURE);
}

// -------------------4.创建命令队列--------------------------------------
cl_command_queue command_queue = clCreateCommandQueue(context, pDeviceId[0], NULL, &iErrorNum);
if (NULL == command_queue)
{
cout << "create command_queue fail." << endl;
exit(EXIT_FAILURE);
}

FILE* handle_kernelFile = fopen("MyOpenCL.cl", "rb");
if (NULL == handle_kernelFile)
{
cout << "open kernel file fail." << endl;
exit(EXIT_FAILURE);
}
size_t CodeLength = 0;
fseek(handle_kernelFile, 0, ios_base::end);
CodeLength = ftell(handle_kernelFile);
fseek(handle_kernelFile, 0, ios_base::beg);
//char* strSource = new char(CodeLength + 1);
char* strSource = (char*)malloc(CodeLength + 1);
strSource[CodeLength] = '\0';
fread(strSource, sizeof(char), CodeLength, handle_kernelFile);
cout << "kernel Code:\n" << strSource << endl;
fclose(handle_kernelFile);

//ifstream srcfile("MyOpenCL.cl");
//string srcPro(istreambuf_iterator<char>(srcfile), (istreambuf_iterator<char>()));
//const char* str = srcPro.c_str();
//size_t length = srcPro.length();
//cout << "kernel code:\n" << str << endl;
////cl_program program = clCreateProgramWithSource(context, NumDevice, (const char**)&str, &length, &iErrorNum);
//cl_program program = clCreateProgramWithSource(context, NumDevices, (const char**)&str, &length, &iErrorNum);


//ifstream kernelFiele("MyOpenCL.cl", ios::in);
//if (!kernelFiele.is_open())
//{
// cout << "" << endl;
// return NULL;
//}
//ostringstream oss;
//oss << kernelFiele.rdbuf();
//string strStdstr = oss.str();
//const char* srcStr = strStdstr.c_str();

// ----------------------5. 创建程序对象------------------------------
cl_program program = clCreateProgramWithSource(context, NumDevices, (const char**)&strSource, &CodeLength, &iErrorNum);
if (NULL == program)
{
cout << "create program fail." << endl;
exit(EXIT_FAILURE);
}

// -----------------------------6. 编译程序--------------------------------

iErrorNum = clBuildProgram(program, NumDevices, pDeviceId, NULL, NULL, NULL);
if (CL_SUCCESS != iErrorNum)
{
cout << "build program fail." << endl;
exit(EXIT_FAILURE);
}

//const unsigned int inputSignalWidth = 8;
//const unsigned int inputSignalHeight = 8;

//cl_uint inputsignal[inputSignalWidth][inputSignalHeight] =
//{
// { 3, 1, 1, 4, 8, 2, 1, 3 },
// { 4, 2, 1, 1, 2, 1, 2, 3 },
// { 4, 4, 4, 4, 3, 2, 2, 2 },
// { 9, 8, 3, 8, 9, 0, 0, 0 },
// { 9, 3, 3, 9, 0, 0, 0, 0 },
// { 0, 9, 0, 8, 0, 0, 0, 0 },
// { 3, 0, 8, 8, 9, 4, 4, 4 },
// { 5, 9, 8, 1, 8, 1, 1, 1 }
//};

//const unsigned int maskWidth = 3;
//const unsigned int maskHeight = 3;

//cl_uint mask[maskWidth][maskHeight] =
//{ { 1, 1, 1 }, { 1, 0, 1 }, { 1, 1, 1 }};

//const unsigned int outputSignalWidth = 6;
//const unsigned int outputSignalHeight = 6;
//cl_uint outsignal[outputSignalWidth][outputSignalHeight];

//cl_mem inputbuffer, maskbuffer, outputbuffer;
//inputbuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_HOST_PTR, sizeof(cl_uint)*inputSignalWidth*inputSignalHeight, static_cast<void*>(inputsignal), NULL);
//maskbuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_HOST_PTR, sizeof(cl_uint)*maskWidth*maskHeight, static_cast<void*>(mask), NULL);
//outputbuffer = clCreateBuffer(context,CL_MEM_WRITE_ONLY, sizeof(cl_uint)*outputSignalWidth*outputSignalHeight, NULL, NULL);


cl_mem inputbuffera, inputbufferb, outputbuffer;

//cl_mem memObject[3] = { 0, 0, 0 };

float result[ARRAY_SIZE];
float a[ARRAY_SIZE];
float b[ARRAY_SIZE];
for (int i = 0; i < ARRAY_SIZE; i++)
{
a[i] = i;
b[i] = i * 2;
}
inputbuffera = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_HOST_PTR, sizeof(float)* ARRAY_SIZE, a, NULL);
inputbufferb = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_HOST_PTR, sizeof(float)* ARRAY_SIZE, b, NULL);
outputbuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float)*ARRAY_SIZE, NULL, NULL);

cl_kernel kernel = clCreateKernel(program, "hello_kernel", NULL);
if (NULL == kernel)
{
cout << "create kernel fail." << endl;
exit(EXIT_FAILURE);
}

iErrorNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputbuffera);
iErrorNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &inputbufferb);
iErrorNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &outputbuffer);
//iErrorNum = clSetKernelArg(kernel, 0, sizeof(inputbuffer), &inputbuffer);
//iErrorNum |= clSetKernelArg(kernel, 1, sizeof(maskbuffer), &maskbuffer);
//iErrorNum |= clSetKernelArg(kernel, 2, sizeof(outputbuffer), &outputbuffer);
//iErrorNum |= clSetKernelArg(kernel, 3, sizeof(cl_uint),&inputSignalWidth);
//iErrorNum |= clSetKernelArg(kernel, 4, sizeof(cl_uint),&maskWidth);

//const size_t global_work_size[1] = {outputSignalWidth*outputSignalHeight};
const size_t global_work_size[1] = { ARRAY_SIZE };
const size_t local_work_size[1] = { 1 };
iErrorNum = clEnqueueNDRangeKernel(command_queue, kernel, NumDevices, NULL, global_work_size, local_work_size, 0, NULL, NULL);
if (CL_SUCCESS != iErrorNum)
{
cout << "execute kernel fail." << endl;
exit(EXIT_FAILURE);
}
//OUT_OF_RESOURCE_RANGE
//iErrorNum = clEnqueueReadBuffer(command_queue, outputbuffer, CL_TRUE, 0, sizeof(cl_uint)*outputSignalWidth*outputSignalHeight, outsignal, 0, NULL, NULL);
iErrorNum = clEnqueueReadBuffer(command_queue, outputbuffer, CL_TRUE, 0, sizeof(float)*ARRAY_SIZE, result, 0, NULL, NULL);
if (CL_SUCCESS != iErrorNum)
{
cout << "Readbuffer fail." << endl;
exit(EXIT_FAILURE);
}
for (int i = 0; i < ARRAY_SIZE; i++)
{
cout<<result[i]<<endl;
}
clReleaseContext(context);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseMemObject(inputbuffera);
clReleaseMemObject(inputbufferb);
clReleaseMemObject(outputbuffer);
clReleaseCommandQueue(command_queue);
}
return 0;
}
...全文
135 4 打赏 收藏 转发到动态 举报
写回复
用AI写文章
4 条回复
切换为时间正序
请发表友善的回复…
发表回复
Groves_Lin 2016-12-08
  • 打赏
  • 举报
回复
如题,所贴代码中创建缓存与写在创建时同时进行,读buffer时总是OUT_OF_RESOURCE_RANGE,后来更改代码分布进行,程序正确执行,输出计算结果; cl_mem inputbuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_uint)*inputSignalWidth*inputSignalHeight, NULL, &iErrorNum); cl_mem maskbuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_uint)*maskWidth*maskHeight, NULL, &iErrorNum); cl_mem outputbuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_uint)*outputSignalWidth*outputSignalHeight, NULL, NULL); iErrorNum = clEnqueueWriteBuffer(command_queue, inputbuffer, CL_TRUE, 0, sizeof(cl_uint)*inputSignalWidth*inputSignalHeight, inputsignal, 0, NULL, NULL); iErrorNum = clEnqueueWriteBuffer(command_queue, maskbuffer, CL_TRUE, 0, sizeof(cl_uint)*maskWidth*maskHeight, mask, 0, NULL, NULL);
赵4老师 2016-12-08
  • 打赏
  • 举报
回复
顺便说说解决办法,提携一下后辈。
赵4老师 2016-12-08
  • 打赏
  • 举报
回复
帮结帖+蹭分
Groves_Lin 2016-12-08
  • 打赏
  • 举报
回复
已自己解决问题了,

24,854

社区成员

发帖
与我相关
我的任务
社区描述
C/C++ 工具平台和程序库
社区管理员
  • 工具平台和程序库社区
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧