Reputation: 343
This is a follow up to OpenCL Host ran out of Memory in trivial Kernel results that after applying the corrections and taking another errors I got that my program hangs at the EnqueueReadBuffer
for an unknown reason!
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#define CL_TARGET_OPENCL_VERSION 210
#include "CL/cl.h"
char* program_src = "__kernel void SAXPY (__global float* x, __global float* y, float a)\n"
"{\n"
"const int i = get_global_id (0);\n"
"y [i] += a * x [i];\n"
"}\n";
int main() {
cl_platform_id platform_ids[16];
cl_uint platform_count;
if (clGetPlatformIDs(16, &platform_ids, &platform_count) != CL_SUCCESS) {
return EXIT_FAILURE;
}
printf("%i cl platform(s) found\n", platform_count);
if (platform_count == 0) {
return EXIT_FAILURE;
}
printf("choosing platform 0...\n");
cl_device_id device_ids[16];
cl_int device_count;
if (clGetDeviceIDs(platform_ids[0], CL_DEVICE_TYPE_ALL, 16, &device_ids, &device_count) != CL_SUCCESS) {
return EXIT_FAILURE;
}
printf("%i cl device(s) found on platform 0\n", device_count);
if (device_count == 0) {
return EXIT_FAILURE;
}
cl_device_id device = device_ids[0];
printf("** running test **\n");
cl_int cl_fehler;
cl_context ctx = clCreateContext(NULL, 1, &device, NULL, NULL, &cl_fehler);
if (ctx == NULL) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clCommandQueue\n");
cl_fehler = CL_SUCCESS;
cl_command_queue queue = clCreateCommandQueue(ctx, device, 0, &cl_fehler);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
// Replace 1 mit Zahlen von Gärate IDs
printf("Am clCreateProgram\n");
cl_fehler = CL_SUCCESS;
cl_program program = clCreateProgramWithSource(ctx, 1, &program_src, NULL, &cl_fehler);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clBuildProgram\n");
cl_fehler = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clCreateKernel\n");
cl_fehler = CL_SUCCESS;
cl_kernel kernel = clCreateKernel(program, "SAXPY", &cl_fehler);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clCreateBuffer\n");
cl_fehler = CL_SUCCESS;
cl_mem eingabe_buffer = clCreateBuffer(ctx, CL_MEM_READ_ONLY, sizeof(cl_float) * 10, NULL, &cl_fehler);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clCreateBuffer\n");
cl_fehler = CL_SUCCESS;
cl_mem ausgabe_buffer = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, sizeof(cl_float) * 10, NULL, &cl_fehler);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
cl_float eingabe_data[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
printf("Am clEnqueueWriteBuffer\n");
cl_fehler = clEnqueueWriteBuffer(queue, eingabe_buffer, CL_TRUE, 0, sizeof(cl_float) * 10, &eingabe_data, 0, NULL, NULL);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clSetKernelArg\n");
cl_fehler = clSetKernelArg(kernel, 0, sizeof(cl_mem), &eingabe_buffer);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clSetKernelArg\n");
cl_fehler = clSetKernelArg(kernel, 1, sizeof(cl_mem), &ausgabe_buffer);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clSetKernelArg\n");
cl_float f = 2.0;
cl_fehler = clSetKernelArg(kernel, 2, sizeof(cl_float), &f);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clEnqueueNDRangeKernel\n");
const size_t globalWorkSize[3] = { 10, 0, 0 };
cl_fehler = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalWorkSize, NULL, 0, NULL, NULL);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clEnqueueReadBuffer\n");
cl_float ausgabe_data[3];
cl_fehler = clEnqueueReadBuffer(queue, ausgabe_buffer, CL_TRUE, 0, sizeof(cl_float) * 10, &ausgabe_data, 0, NULL, NULL);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
// No point in share the rest of the code because the problem is being happening here
}
Afer applying the suggestions from @pmdj the problems persists and can confirm that the clEnqueueReadBuffer part of the loggin is being printed.
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#define CL_TARGET_OPENCL_VERSION 210
#include "CL/cl.h"
char* program_src = "__kernel void SAXPY (__global float* x, __global float* y, float a)\n"
"{\n"
"const int i = get_global_id (0);\n"
"y [i] = a * x [i];\n"
"}\n";
int main() {
cl_platform_id platform_ids[16];
cl_uint platform_count;
if (clGetPlatformIDs(16, &platform_ids, &platform_count) != CL_SUCCESS) {
return EXIT_FAILURE;
}
printf("%i cl platform(s) found\n", platform_count);
if (platform_count == 0) {
return EXIT_FAILURE;
}
printf("choosing platform 0...\n");
cl_device_id device_ids[16];
cl_int device_count;
if (clGetDeviceIDs(platform_ids[0], CL_DEVICE_TYPE_ALL, 16, &device_ids, &device_count) != CL_SUCCESS) {
return EXIT_FAILURE;
}
printf("%i cl device(s) found on platform 0\n", device_count);
if (device_count == 0) {
return EXIT_FAILURE;
}
cl_device_id device = device_ids[0];
printf("** running test **\n");
cl_int cl_fehler;
cl_context ctx = clCreateContext(NULL, 1, &device, NULL, NULL, &cl_fehler);
if (ctx == NULL) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clCommandQueue\n");
cl_fehler = CL_SUCCESS;
cl_command_queue queue = clCreateCommandQueue(ctx, device, 0, &cl_fehler);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
// Replace 1 mit Zahlen von Gärate IDs
printf("Am clCreateProgram\n");
cl_fehler = CL_SUCCESS;
cl_program program = clCreateProgramWithSource(ctx, 1, &program_src, NULL, &cl_fehler);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clBuildProgram\n");
cl_fehler = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clCreateKernel\n");
cl_fehler = CL_SUCCESS;
cl_kernel kernel = clCreateKernel(program, "SAXPY", &cl_fehler);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clCreateBuffer\n");
cl_fehler = CL_SUCCESS;
cl_mem eingabe_buffer = clCreateBuffer(ctx, CL_MEM_READ_ONLY, sizeof(cl_float) * 10, NULL, &cl_fehler);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clCreateBuffer\n");
cl_fehler = CL_SUCCESS;
cl_mem ausgabe_buffer = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, sizeof(cl_float) * 10, NULL, &cl_fehler);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
cl_float eingabe_data[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
printf("Am clEnqueueWriteBuffer\n");
cl_fehler = clEnqueueWriteBuffer(queue, eingabe_buffer, CL_TRUE, 0, sizeof(cl_float) * 10, &eingabe_data, 0, NULL, NULL);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clSetKernelArg\n");
cl_fehler = clSetKernelArg(kernel, 0, sizeof(cl_mem), &eingabe_buffer);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clSetKernelArg\n");
cl_fehler = clSetKernelArg(kernel, 1, sizeof(cl_mem), &ausgabe_buffer);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clSetKernelArg\n");
cl_float f = 2.0;
cl_fehler = clSetKernelArg(kernel, 2, sizeof(cl_float), &f);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clEnqueueNDRangeKernel\n");
const size_t globalWorkSize[3] = { 10, 0, 0 };
cl_fehler = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalWorkSize, NULL, 0, NULL, NULL);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
clFinish(queue);
printf("Am clEnqueueReadBuffer\n");
cl_float ausgabe_data[10] = {1, 1, 1, 1, 1, 1, 1 ,1 ,1 ,1 };
cl_fehler = clEnqueueReadBuffer(queue, ausgabe_buffer, CL_TRUE, 0, sizeof(cl_float) * 10, &ausgabe_data[0], 0, NULL, NULL);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
// No point in share the rest of the code because the problem is being happening here
}
The host is openSUSE Leap 15.5 x86_64 on an AMD ATI 04:00.0 Lucienne and AMD Ryzen 7 5700U with Radeon Graphics (16) @ 1.800GHz with AMD rocm drivers installed systemwide.
I am compiling with cc src/main.c -lOpenCL -I/opt/rocm-6.0.0/include/ -L/opt/rocm-6.0.0/lib/ -o test
And I have the rocm-opencl-sdk
package installed from here which is the vendors (AMD's) official page.
Upvotes: 0
Views: 137
Reputation: 23428
I can spot at least 3 issues:
cl_float ausgabe_data[3]; ---------------------^^^
versus
clEnqueueReadBuffer(queue, ausgabe_buffer, CL_TRUE, 0, sizeof(cl_float) * 10, &ausgabe_data, 0, NULL, NULL); -------------------------------------------------------^^^^^^^^^^^^^^^^^^^^^
This is trying to read 10 floats into a buffer that can hold 3.
This may not cause issues depending on your C compiler, but as far as I'm aware it's not strictly correct. When passing the pointer to the copy destination, you have:
…, &ausgabe_data, …
This takes the address of the array itself, not of the data items within it. In most cases these should be the same, but better stick to the safe approach. Either remove the ampersand (&
) to use the fact that an array automatically degrades into a pointer to its 0th element, or explicitly specify the array item at which to start writing (&ausgabe_data[0]
).
Note that the call to clEnqueueWriteBuffer()
with &eingabe_data
has the same issue. The clGetPlatformIDs(16, &platform_ids,
and clGetDeviceIDs(…, &device_ids,
calls have the same problem, and your compiler should be warning you about these latter 2. (clEnqueueWriteBuffer
and clEnqueueReadBuffer
unfortunately are not type safe and accept void*
so your compiler cannot spot mistakes there. All the more reason to double check them manually.)
Don't ignore warnings! Please go and fix any other warnings your compiler is telling you about before posting questions here. If you're not seeing warnings about this, add something like -Wall -Wextra
to your C compiler flags. Then go fix those issues as well as the ones I identified.
The kernel code uses:
y [i] += …
so, it reads from y, adds that value to another, then writes it back. Yet, the buffer for y
is created as write-only, and its contents is never initialised:
cl_mem ausgabe_buffer = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, ---------------------------------------------------^^^^^^^^^^
Either make the buffer read-write and initialise it with sensible initial data (all zeroes?), or change the kernel code to pure assignment. (y [i] = …
)
Upvotes: 1