Reputation: 145
I am new to OpenCL and trying to run my first program. I have an AMD Radeon graphics card. The program tries to compute matrix and vector multiplication of size 4. I have two files matvec.c and matvec.cl. Below is the code for matvec.c
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
#define _CRT_SECURE_NO_WARNINGS
#define PROGRAM_FILE "..\\matvec\\matvec.cl"
#define KERNEL_FUNC "matvec_mult"
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#ifdef MAC
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
int main() {
cl_platform_id platform;
cl_device_id device;
cl_context context;
cl_command_queue queue;
cl_int i, err;
cl_program program;
FILE *program_handle;
char *program_buffer, *program_log;
size_t program_size, log_size;
cl_kernel kernel;
size_t work_units_per_kernel;
float mat[16], vec[4], result[4];
float correct[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
cl_mem mat_buff, vec_buff, res_buff;
for (i = 0; i<16; i++) {
mat[i] = i * 2.0f;
}
for (i = 0; i<4; i++) {
vec[i] = i * 3.0f;
correct[0] += mat[i] * vec[i];
correct[1] += mat[i + 4] * vec[i];
correct[2] += mat[i + 8] * vec[i];
correct[3] += mat[i + 12] * vec[i];
}
/* Identify a platform */
err = clGetPlatformIDs(1, &platform, NULL);
if (err < 0) {
perror("Couldn't find any platforms");
exit(1);
}
printf("Identified platform");
/* Access a device */
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
if (err < 0) {
perror("Couldn't find any devices");
exit(1);
}
printf("\nAccessed device");
/* Create the context */
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
if (err < 0) {
perror("Couldn't create a context");
exit(1);
}
printf("\nCreated context");
/* Read program file and place content into buffer */
program_handle = fopen(PROGRAM_FILE, "r");
if (program_handle == NULL) {
perror("Couldn't find the program file");
exit(1);
}
printf("\nRead program and placed in buffer");
fseek(program_handle, 0, SEEK_END);
program_size = ftell(program_handle);
rewind(program_handle);
program_buffer = (char*)malloc(program_size + 1);
program_buffer[program_size] = '\0';
fread(program_buffer, sizeof(char), program_size,
program_handle);
fclose(program_handle);
printf("\nAfter fseek and fclose block");
/* Create program from file */
program = clCreateProgramWithSource(context, 1,
(const char**)&program_buffer, &program_size, &err);
if (err < 0) {
perror("Couldn't create the program");
exit(1);
}
free(program_buffer);
printf("\nAfter creating program");
/* Build program */
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err < 0) {
/* Find size of log and print to std output */
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
0, NULL, &log_size);
program_log = (char*)malloc(log_size + 1);
program_log[log_size] = '\0';
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
log_size + 1, program_log, NULL);
printf("%s\n", program_log);
free(program_log);
exit(1);
}
printf("Built program");
kernel = clCreateKernel(program, KERNEL_FUNC, &err);
queue = clCreateCommandQueue(context, device, 0, &err);
mat_buff = clCreateBuffer(context, CL_MEM_READ_ONLY |
CL_MEM_COPY_HOST_PTR, sizeof(float) * 16, mat, &err);
vec_buff = clCreateBuffer(context, CL_MEM_READ_ONLY |
CL_MEM_COPY_HOST_PTR, sizeof(float) * 4, vec, &err);
res_buff = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(float) * 4, NULL, &err);
clSetKernelArg(kernel, 0, sizeof(cl_mem), &mat_buff);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &vec_buff);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &res_buff);
work_units_per_kernel = 4;
clEnqueueNDRangeKernel(queue, kernel, 1, NULL,
&work_units_per_kernel, NULL, 0, NULL, NULL);
clEnqueueReadBuffer(queue, res_buff, CL_TRUE, 0,
sizeof(float) * 4, result, 0, NULL, NULL);
if ((result[0] == correct[0]) && (result[1] == correct[1])
&& (result[2] == correct[2]) && (result[3] == correct[3])) {
printf("\nMatrix-vector multiplication successful.\n");
}
else {
printf("\nMatrix-vector multiplication unsuccessful.\n");
}
clReleaseMemObject(mat_buff);
clReleaseMemObject(vec_buff);
clReleaseMemObject(res_buff);
clReleaseKernel(kernel);
clReleaseCommandQueue(queue);
clReleaseProgram(program);
clReleaseContext(context);
getchar();
return 0;
}
and the code for matvec.cl code is as follows
__kernel void matvec_mult(__global float4* matrix, __global float4* vector,
__global float* result) {
int i = get_global_id(0);
result[i] = dot(matrix[i], vector[0]);
}
When I try to execute it, I get the following error
Identified platform
Accessed device
Created context
Read program and placed in buffer
After fseek and fclose block
After creating program
"C:\Users\<user>\AppData\Local\Temp\OCL3099.tmp.cl", line 5: error:
unrecognized
token
}════
^
"C:\Users\<user>\AppData\Local\Temp\OCL3099.tmp.cl", line 5: error: expected a
declaration
}════
^
"C:\Users\<user>\AppData\Local\Temp\OCL3099.tmp.cl", line 5: error:
unrecognized
token
}════
^
"C:\Users\<user>\AppData\Local\Temp\OCL3099.tmp.cl", line 5: error:
unrecognized
token
}════
^
"C:\Users\<user>\AppData\Local\Temp\OCL3099.tmp.cl", line 5: error:
unrecognized
token
}════
^
5 errors detected in the compilation of
"C:\Users\<user>\AppData\Local\Temp\OCL3099.tmp.cl".
Frontend phase failed compilation.
This tells me that clBuildProgram is having compilation errors and I do not see anything wrong in matvec.cl that the errors are showing me. What can be the problem, if anyone can guide me in the right direction
Upvotes: 0
Views: 285
Reputation: 74
Try opening kernel file as a binary file with fopen(PROGRAM_FILE, "rb");
instead of opening it as a text file with fopen(PROGRAM_FILE, "r");
. Otherwise program_size
gives you 175 for your file and fread()
returns 170 which is file size excluding 5 line endings bytes. This is because of converting two-bytes CRLF to single-byte LF inside fread()
for text files. Your numbers can be different but the behaviour is the same according to logs.
You can always check your code by printing program_buffer
before putting it to clCreateProgramWithSource()
.
Upvotes: 0