Reputation: 1210
I am writing a Ruby c extension to perform linear algebra calculations using the Accelerate
macos
framework. In order to use all the cores available I am also using rb_thread_call_without_gvl
to unlock the global VM lock.
I am not a c expert so please bear with me.
The idea is to create a ruby method that operates on the two input matrices
VALUE matmat_mul(VALUE self, VALUE matrixA, VALUE matrixB)
In this method I create a struct
that I then pass to actual function
void* matmat_mul_nogvl(void* inputMatricesPtr)
You can see from the output that everything is working as expected within the c code, but I am struggling to understand how to return the final matrix (array of arrays) back from the c code. The final matrix is nil
. I think that I am missing the part where I convert matC
back to a Ruby object.
This is my code so far (there are lots of debug printf
to verify that the calculation is working correctly)
#include <stdio.h>
#include <ruby.h>
#include <ruby/thread.h>
#include <time.h>
#include <Accelerate/Accelerate.h>
#include <math.h>
typedef struct {
double *matrix;
int nrows;
int ncols;
}Matrix;
typedef struct {
Matrix A;
Matrix B;
Matrix C;
}Transfer;
void* matmat_mul_nogvl(void* inputMatricesPtr)
{
printf("The input matrix struct is at the address %p\n", inputMatricesPtr);
int i,j;
int cblas_order = 101;
int cblas_transpose = 111;
Transfer inputMatrices;
inputMatrices = *(Transfer *)inputMatricesPtr;
double *matA = inputMatrices.A.matrix;
int rowsA = inputMatrices.A.nrows;
int colsA = inputMatrices.A.ncols;
double *matB = inputMatrices.B.matrix;
int rowsB = inputMatrices.B.nrows;
int colsB = inputMatrices.B.ncols;
double *matC = inputMatrices.C.matrix;
int rowsC = inputMatrices.C.nrows;
int colsC = inputMatrices.C.ncols;
printf("\nIn cblas_dgem\n");
time_t t = time(NULL);
struct tm tm = *localtime(&t);
printf("%d-%02d-%d %02d:%02d:%02d - In cblas_dgem\n", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec);
printf("Matrix A shape: (%d,%d)\n", rowsA, colsA);
printf("Matrix B shape: (%d,%d)\n", rowsB, colsB);
printf("Matrix C shape: (%d,%d)\n\n", rowsC, colsC);
int lda = colsA;
int ldb = colsB;
int ldc = colsC;
cblas_dgemm(cblas_order, cblas_transpose, cblas_transpose, rowsA, colsB, colsA, 1.0, matA, lda, matB, ldb, 1.0, matC, ldc);
for (i=0; i<rowsA; i++)
{
for (j=0; j<colsA; j++)
{
printf("Matrix A Element(%d,%d)=%f\n", i, j, matA[i * colsA + j]);
}
}
for (i=0; i<rowsB; i++)
{
for (j=0; j<colsB; j++)
{
printf("Matrix B Element(%d,%d)=%f\n", i, j, matB[i * colsB + j]);
}
}
for (i=0; i<rowsC; i++)
{
for (j=0; j<colsC; j++)
{
printf("Matrix C Element(%d,%d)=%f\n", i, j, matC[i * colsC + j]);
}
}
return NULL;
}
VALUE matmat_mul(VALUE self, VALUE matrixA, VALUE matrixB)
{
printf("\nIn matmul\n");
time_t t = time(NULL);
struct tm tm = *localtime(&t);
printf("%d-%02d-%d %02d:%02d:%02d In matmul\n", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec);
int rowsA = RARRAY_LEN(matrixA);
VALUE firstElement = rb_ary_entry(matrixA, 0);
int colsA = RARRAY_LEN(firstElement);
printf("Matrix A shape: (%d,%d)\n", rowsA, colsA);
int rowsB = RARRAY_LEN(matrixB);
firstElement = rb_ary_entry(matrixB, 0);
int colsB = RARRAY_LEN(firstElement);
printf("Matrix B shape: (%d,%d)\n", rowsB, colsB);
int i,j;
double *matA = (double *)malloc(rowsA * colsA * sizeof(double));
double *matB = (double *)malloc(rowsB * colsB * sizeof(double));
int rowsC = rowsA;
int colsC = colsB;
printf("Matrix C shape: (%d,%d)\n\n", rowsC, colsC);
double *matC = (double *)malloc(rowsC * colsC * sizeof(double));
VALUE rowA;
for (i=0; i<rowsA; i++)
{
rowA = rb_ary_entry(matrixA, i);
for (j=0; j<colsA; j++)
{
matA[i * colsA + j] = NUM2DBL(rb_ary_entry( rowA, j));
printf("Matrix A Element(%d,%d)=%f\n", i, j, matA[i * colsA + j]);
}
}
printf("\n");
VALUE rowB;
for (i=0; i<rowsB; i++)
{
rowB = rb_ary_entry(matrixB, i);
for (j=0; j<colsB; j++)
{
matB[i * colsB + j] = NUM2DBL(rb_ary_entry( rowB, j));
printf("Matrix B Element(%d,%d)=%f\n", i, j, matB[i * colsB + j]);
}
}
printf("\nBefore MatMul Matrix C is:\n");
for (i=0; i<rowsC; i++)
{
for (j=0; j<colsC; j++)
{
matC[i * colsC + j] = 0.0;
printf("Matrix C Element(%d,%d)=%f\n", i, j, matC[i * colsC + j]);
}
}
printf("\n");
Matrix inputMatrixA = {matA, rowsA, colsA};
Matrix inputMatrixB = {matB, rowsB, colsB};
Matrix inputMatrixC = {matC, rowsC, colsC};
Transfer inputMatrices = {inputMatrixA, inputMatrixB, inputMatrixC};
rb_thread_call_without_gvl(matmat_mul_nogvl, &inputMatrices, NULL, NULL);
printf("\nBack in MatMul Matrix C is:\n");
for (i=0; i<rowsC; i++)
{
for (j=0; j<colsC; j++)
{
printf("Matrix C Element(%d,%d)=%f\n", i, j, matC[i * colsC + j]);
}
}
free(matA);
free(matB);
return Qnil;
}
void Init_blasnogvl()
{
VALUE rg = rb_define_module("RG");
VALUE linalg = rb_define_module_under(rg, "LinearAlgebra");
VALUE operation = rb_define_class_under(linalg, "Operation", rb_cObject);
rb_define_method(operation, "matmat_mul", matmat_mul, 2);
}
You can compile it using the following extconf.rb
require 'mkmf'
extension_name = 'blasnogvl'
have_framework('Accelerate')
create_makefile(extension_name)
And test it with the following ruby code
require './blasnogvl'
puts "#{Time.now} - Started"
rows = 4
cols = 3
mat = Array.new(rows){Array.new(cols){rand}}
puts "#{Time.now} - Matrix generated"
mat[0] = [0.0, 1.0, 2.0]
mat[1] = [3.0, 4.0, 5.0]
mat[2] = [6.0, 7.0, 8.0]
mat[3] = [6.0, 7.0, 8.0]
puts mat.to_s
matA = mat
matB = mat.transpose
operation = RG::LinearAlgebra::Operation.new
matC = operation.matmat_mul(matA, matB)
puts "After calculation matA is"
puts matA.to_s
puts "After calculation matB is"
puts matB.to_s
puts "matC in ruby is"
puts matC.to_s
puts "#{Time.now} - Matrix calculated"
Upvotes: 0
Views: 303
Reputation: 341
Final value of your matmat_mul
function calls return Qnil
, this is basically return nil
in ruby.
So you need to create ruby array for matrix, create an array to hold each line of data, populate that lines and push them into the result array.
First could be achieved by rb_ary_new
, but since the dimensions are already known we can give ruby a hint what its size will be using rb_ary_new_capa
, and then populate the values with rb_ary_push
which is an implementation for Array#<<
method.
The following code should do:
// Multiplication code above ...
VALUE matrixC = rb_ary_new_capa(rowsC);
for (i = 0; i < rowsC; i++) {
VALUE rowC = rb_ary_new_capa(colsC);
for (j = 0; j < colsC; j++)
rb_ary_push(rowC, DBL2NUM(matC[i * colsC + j]);
rb_ary_push(matrixC, rowC);
}
return matrixC;
Upvotes: 2