Reputation: 16998
Please check the supplied source code - I want to simplify this statement such that I don't have to pass so many type parameters to load a kernel:
var kernel = accelerator_.LoadAutoGroupedStreamKernel<Index2D, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>>(RMatrixKernels.AdditionKernel);
I tried to use a wrapper class like this:
public class KernelWrapper
{
public Delegate Kernel { get; private set; }
public KernelWrapper(Delegate kernel)
{
Kernel = kernel;
}
}
But I was unsuccessful.
How can I do it?
Source code:
using ILGPU;
using ILGPU.Runtime;
using System;
using System.Collections.Generic;
public class RMatrixKernels
{
public static void AdditionKernel(Index2D index, ArrayView2D<int, Stride2D.DenseX> matrixA, ArrayView2D<int, Stride2D.DenseX> matrixB, ArrayView2D<int, Stride2D.DenseX> result)
{
result[index] = matrixA[index] + matrixB[index];
}
public static void ScalarMultiplyKernel(
Index2D index,
ArrayView2D<int, Stride2D.DenseX> aView, int scalar,
ArrayView2D<int, Stride2D.DenseX> cView)
{
cView[index] = aView[index] * scalar;
}
}
public class RMatrix
{
private int[,] matrix_;
public int Rows { get; private set; }
public int Cols { get; private set; }
private static Accelerator accelerator_;
static RMatrix()
{
var context_ = Context.CreateDefault();
accelerator_ = context_.GetPreferredDevice(preferCPU: true).CreateAccelerator(context_);
}
public RMatrix(int rows, int cols)
{
Rows = rows;
Cols = cols;
matrix_ = new int[rows, cols];
}
public RMatrix(int[,] arr)
{
Rows = arr.GetLength(0);
Cols = arr.GetLength(1);
matrix_ = new int[Rows, Cols];
Array.Copy(arr, matrix_, arr.Length);
}
public static RMatrix operator +(RMatrix a, RMatrix b)
{
if (a.Rows != b.Rows || a.Cols != b.Cols)
throw new ArgumentException("Matrix dimensions must match for addition.");
var deviceMatrixA = accelerator_.Allocate2DDenseX<int>(new Index2D(a.Rows, a.Cols));
var deviceMatrixB = accelerator_.Allocate2DDenseX<int>(new Index2D(a.Rows, a.Cols));
var deviceResult = accelerator_.Allocate2DDenseX<int>(new Index2D(a.Rows, a.Cols));
deviceMatrixA.CopyFromCPU(a.matrix_);
deviceMatrixB.CopyFromCPU(b.matrix_);
//TODO:
var kernel = accelerator_.LoadAutoGroupedStreamKernel<Index2D, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>>(RMatrixKernels.AdditionKernel);
kernel((a.Rows, a.Cols), deviceMatrixA.View, deviceMatrixB.View, deviceResult.View);
accelerator_.Synchronize();
int[,] hostResult = new int[a.Rows, a.Cols];
deviceResult.CopyToCPU(hostResult);
deviceMatrixA.Dispose();
deviceMatrixB.Dispose();
deviceResult.Dispose();
return new RMatrix(hostResult);
}
public static RMatrix operator *(RMatrix a, int scalar)
{
var deviceMatrixA = accelerator_.Allocate2DDenseX<int>(new Index2D(a.Rows, a.Cols));
var deviceResult = accelerator_.Allocate2DDenseX<int>(new Index2D(a.Rows, a.Cols));
deviceMatrixA.CopyFromCPU(a.matrix_);
//TODO:
var kernel = accelerator_.LoadAutoGroupedStreamKernel<Index2D, ArrayView2D<int, Stride2D.DenseX>, int, ArrayView2D<int, Stride2D.DenseX>>(RMatrixKernels.ScalarMultiplyKernel);
kernel((a.Rows, a.Cols), deviceMatrixA.View, scalar, deviceResult.View);
accelerator_.Synchronize();
int[,] hostResult = new int[a.Rows, a.Cols];
deviceResult.CopyToCPU(hostResult);
deviceMatrixA.Dispose();
deviceResult.Dispose();
return new RMatrix(hostResult);
}
public void Show()
{
for (int i = 0; i < Rows; i++)
{
for (int j = 0; j < Cols; j++)
{
Console.Write(matrix_[i, j] + "\t");
}
Console.WriteLine();
}
}
}
public static class Program
{
static void Main()
{
int[,] hostMatrixA = {
{ 1, 2, 3 },
{ 4, 5, 6 },
{ 7, 8, 9 }
};
int[,] hostMatrixB = {
{ 9, 8, 7 },
{ 6, 5, 4 },
{ 3, 2, 1 }
};
RMatrix a = new RMatrix(hostMatrixA);
RMatrix b = new RMatrix(hostMatrixB);
RMatrix c = a + b;
RMatrix d = a * 10;
Console.WriteLine("Matrix A + B:");
c.Show();
Console.WriteLine("\nMatrix A * 10:");
d.Show();
}
}
Upvotes: 0
Views: 47
Reputation: 128
Hello here is what I found out: You are calling "LoadAutoGroupedStreamKernel" from the ILGPU-package. This is a static helper-function that gives an Action<TIndex, T1, T2, T3> as an result, not a class. This function looks like:
public static Action<TIndex, T1, T2, T3> LoadAutoGroupedStreamKernel<TIndex, T1, T2, T3>(
this Accelerator accelerator,
Action<TIndex, T1, T2, T3> action)
where TIndex : struct, IIndex
where T1 : struct where T2 : struct where T3 : struct
{
var baseKernel = accelerator.LoadAutoGroupedKernel<TIndex, T1, T2, T3>(action);
return (TIndex index, T1 param1, T2 param2, T3 param3) =>
baseKernel(accelerator.DefaultStream, index, param1, param2, param3);
}
So this is already the method to make things easier. What you can do, is to extract the methods for your purpose like:
private static Action<Index2D, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>> BuildAdditionKernel(Accelerator accelerator_)
{
return accelerator_.LoadAutoGroupedStreamKernel<Index2D, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>>(RMatrixKernels.AdditionKernel);
}
private static Action<Index2D, ArrayView2D<int, Stride2D.DenseX>, int, ArrayView2D<int, Stride2D.DenseX>> BuildSkalarMultiplyKernel(Accelerator accelerator_)
{
return accelerator_.LoadAutoGroupedStreamKernel<Index2D, ArrayView2D<int, Stride2D.DenseX>, int, ArrayView2D<int, Stride2D.DenseX>>(RMatrixKernels.ScalarMultiplyKernel);
}
So that the two functions look like:
public static RMatrix operator +(RMatrix a, RMatrix b)
{
if (a.Rows != b.Rows || a.Cols != b.Cols)
throw new ArgumentException("Matrix dimensions must match for addition.");
var deviceMatrixA = accelerator_.Allocate2DDenseX<int>(new Index2D(a.Rows, a.Cols));
var deviceMatrixB = accelerator_.Allocate2DDenseX<int>(new Index2D(a.Rows, a.Cols));
var deviceResult = accelerator_.Allocate2DDenseX<int>(new Index2D(a.Rows, a.Cols));
deviceMatrixA.CopyFromCPU(a.matrix_);
deviceMatrixB.CopyFromCPU(b.matrix_);
//TODO:
var kernel = BuildAdditionKernel(accelerator_);
kernel((a.Rows, a.Cols), deviceMatrixA.View, deviceMatrixB.View, deviceResult.View);
accelerator_.Synchronize();
int[,] hostResult = new int[a.Rows, a.Cols];
deviceResult.CopyToCPU(hostResult);
deviceMatrixA.Dispose();
deviceMatrixB.Dispose();
deviceResult.Dispose();
return new RMatrix(hostResult);
}
public static RMatrix operator *(RMatrix a, int scalar)
{
var deviceMatrixA = accelerator_.Allocate2DDenseX<int>(new Index2D(a.Rows, a.Cols));
var deviceResult = accelerator_.Allocate2DDenseX<int>(new Index2D(a.Rows, a.Cols));
deviceMatrixA.CopyFromCPU(a.matrix_);
//TODO:
var kernel = BuildSkalarMultiplyKernel(accelerator_);
kernel((a.Rows, a.Cols), deviceMatrixA.View, scalar, deviceResult.View);
accelerator_.Synchronize();
int[,] hostResult = new int[a.Rows, a.Cols];
deviceResult.CopyToCPU(hostResult);
deviceMatrixA.Dispose();
deviceResult.Dispose();
return new RMatrix(hostResult);
}
The next step would be to make your own helper-class like:
public static class RMatrixExtensions
{
public static Action<Index2D, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>> BuildAdditionKernel(this Accelerator accelerator_)
{
return accelerator_.LoadAutoGroupedStreamKernel<Index2D, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>>(RMatrixKernels.AdditionKernel);
}
public static Action<Index2D, ArrayView2D<int, Stride2D.DenseX>, int, ArrayView2D<int, Stride2D.DenseX>> BuildSkalarMultiplyKernel(this Accelerator accelerator_)
{
return accelerator_.LoadAutoGroupedStreamKernel<Index2D, ArrayView2D<int, Stride2D.DenseX>, int, ArrayView2D<int, Stride2D.DenseX>>(RMatrixKernels.ScalarMultiplyKernel);
}
}
The this operator makes it possible the function-calls look like:
[...]
var kernel = accelerator_.BuildAdditionKernel();
[...]
// and
[...]
var kernel = accelerator_.BuildSkalarMultiplyKernel();
[...]
But this is only a matter of taste. And you hide what's really going on.
Upvotes: 1