2,851
社区成员




没有官方释放的gpu压力测试的程序,但是目前QCS8550的GPU对opencl都能进行支持,所以我们可以使用以下的测试程序,将GPU的负载拉到100%, 这个程序需要手动编译:
#include <CL/cl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
const char *kernelSource =
"__kernel void stress_test(__global float *a, __global float *b, __global float *c) {"
" int id = get_global_id(0);"
" float tmp = 0.0;"
" for (int i = 0; i < 1000; ++i) {" // 增加计算量
" tmp += a[id] * b[id];"
" }"
" c[id] = tmp;"
"}";
int main() {
// Initialize OpenCL
cl_platform_id platform;
cl_device_id device;
cl_context context;
cl_command_queue queue;
cl_program program;
cl_kernel kernel;
cl_mem a_mem, b_mem, c_mem;
float *a, *b, *c;
size_t globalSize, localSize;
cl_int err;
// Allocate memory
globalSize = 1024 * 1024 * 2; // 增加全局工作组大小
localSize = 64;
a = (float*)malloc(globalSize * sizeof(float));
b = (float*)malloc(globalSize * sizeof(float));
c = (float*)malloc(globalSize * sizeof(float));
// Fill the arrays with random data
srand(time(NULL));
for(size_t i = 0; i < globalSize; i++) {
a[i] = rand() / (float)RAND_MAX;
b[i] = rand() / (float)RAND_MAX;
}
// Get platform and device information
clGetPlatformIDs(1, &platform, NULL);
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
// Create context and command queue
context = clCreateContext(0, 1, &device, NULL, NULL, &err);
queue = clCreateCommandQueue(context, device, 0, &err);
// Create program from source
program = clCreateProgramWithSource(context, 1, &kernelSource, NULL, &err);
clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
// Create kernel
kernel = clCreateKernel(program, "stress_test", &err);
// Create memory buffers
a_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, globalSize * sizeof(float), NULL, &err);
b_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, globalSize * sizeof(float), NULL, &err);
c_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, globalSize * sizeof(float), NULL, &err);
// Copy data to buffers
clEnqueueWriteBuffer(queue, a_mem, CL_TRUE, 0, globalSize * sizeof(float), a, 0, NULL, NULL);
clEnqueueWriteBuffer(queue, b_mem, CL_TRUE, 0, globalSize * sizeof(float), b, 0, NULL, NULL);
// Set kernel arguments
clSetKernelArg(kernel, 0, sizeof(cl_mem), &a_mem);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &b_mem);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &c_mem);
// Execute the kernel in an infinite loop to stress test the GPU
while (1) {
clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, &localSize, 0, NULL, NULL);
// Remove clFinish to avoid unnecessary synchronization
// clFinish(queue); // Ensure the kernel execution is completed before launching another
}
// Clean up (this part will never be reached in this infinite loop example)
clReleaseMemObject(a_mem);
clReleaseMemObject(b_mem);
clReleaseMemObject(c_mem);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(queue);
clReleaseContext(context);
free(a);
free(b);
free(c);
return 0;
}