user366312
user366312

Reputation: 16998

How can I simplify passing types via a generic function?

Please check the supplied source code - I want to simplify this statement such that I don't have to pass so many type parameters to load a kernel:

var kernel = accelerator_.LoadAutoGroupedStreamKernel<Index2D, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>>(RMatrixKernels.AdditionKernel);

I tried to use a wrapper class like this:

public class KernelWrapper
{
    public Delegate Kernel { get; private set; }

    public KernelWrapper(Delegate kernel)
    {
        Kernel = kernel;
    }
}

But I was unsuccessful.

How can I do it?

Source code:

using ILGPU;
using ILGPU.Runtime;
using System;
using System.Collections.Generic;

public class RMatrixKernels
{
    public static void AdditionKernel(Index2D index, ArrayView2D<int, Stride2D.DenseX> matrixA, ArrayView2D<int, Stride2D.DenseX> matrixB, ArrayView2D<int, Stride2D.DenseX> result)
    {
        result[index] = matrixA[index] + matrixB[index];
    }

    public static void ScalarMultiplyKernel(
        Index2D index,
        ArrayView2D<int, Stride2D.DenseX> aView, int scalar,
        ArrayView2D<int, Stride2D.DenseX> cView)
    {
        cView[index] = aView[index] * scalar;
    }
}

public class RMatrix
{
    private int[,] matrix_;
    public int Rows { get; private set; }
    public int Cols { get; private set; }

    private static Accelerator accelerator_;

    static RMatrix()
    {
        var context_ = Context.CreateDefault();
        accelerator_ = context_.GetPreferredDevice(preferCPU: true).CreateAccelerator(context_);
    }

    public RMatrix(int rows, int cols)
    {
        Rows = rows;
        Cols = cols;
        matrix_ = new int[rows, cols];
    }

    public RMatrix(int[,] arr)
    {
        Rows = arr.GetLength(0);
        Cols = arr.GetLength(1);
        matrix_ = new int[Rows, Cols];
        Array.Copy(arr, matrix_, arr.Length);
    }

    public static RMatrix operator +(RMatrix a, RMatrix b)
    {
        if (a.Rows != b.Rows || a.Cols != b.Cols)
            throw new ArgumentException("Matrix dimensions must match for addition.");

        var deviceMatrixA = accelerator_.Allocate2DDenseX<int>(new Index2D(a.Rows, a.Cols));
        var deviceMatrixB = accelerator_.Allocate2DDenseX<int>(new Index2D(a.Rows, a.Cols));
        var deviceResult = accelerator_.Allocate2DDenseX<int>(new Index2D(a.Rows, a.Cols));

        deviceMatrixA.CopyFromCPU(a.matrix_);
        deviceMatrixB.CopyFromCPU(b.matrix_);

        //TODO: 
        var kernel = accelerator_.LoadAutoGroupedStreamKernel<Index2D, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>>(RMatrixKernels.AdditionKernel);

        kernel((a.Rows, a.Cols), deviceMatrixA.View, deviceMatrixB.View, deviceResult.View);

        accelerator_.Synchronize();

        int[,] hostResult = new int[a.Rows, a.Cols];
        deviceResult.CopyToCPU(hostResult);

        deviceMatrixA.Dispose();
        deviceMatrixB.Dispose();
        deviceResult.Dispose();

        return new RMatrix(hostResult);
    }

    public static RMatrix operator *(RMatrix a, int scalar)
    {
        var deviceMatrixA = accelerator_.Allocate2DDenseX<int>(new Index2D(a.Rows, a.Cols));
        var deviceResult = accelerator_.Allocate2DDenseX<int>(new Index2D(a.Rows, a.Cols));

        deviceMatrixA.CopyFromCPU(a.matrix_);

        //TODO: 
        var kernel = accelerator_.LoadAutoGroupedStreamKernel<Index2D, ArrayView2D<int, Stride2D.DenseX>, int, ArrayView2D<int, Stride2D.DenseX>>(RMatrixKernels.ScalarMultiplyKernel);

        kernel((a.Rows, a.Cols), deviceMatrixA.View, scalar, deviceResult.View);

        accelerator_.Synchronize();

        int[,] hostResult = new int[a.Rows, a.Cols];
        deviceResult.CopyToCPU(hostResult);

        deviceMatrixA.Dispose();
        deviceResult.Dispose();

        return new RMatrix(hostResult);
    }

    public void Show()
    {
        for (int i = 0; i < Rows; i++)
        {
            for (int j = 0; j < Cols; j++)
            {
                Console.Write(matrix_[i, j] + "\t");
            }
            Console.WriteLine();
        }
    }
}

public static class Program
{
    static void Main()
    {
        int[,] hostMatrixA = {
                        { 1, 2, 3 },
                        { 4, 5, 6 },
                        { 7, 8, 9 }
                    };
        int[,] hostMatrixB = {
                        { 9, 8, 7 },
                        { 6, 5, 4 },
                        { 3, 2, 1 }
                    };

        RMatrix a = new RMatrix(hostMatrixA);
        RMatrix b = new RMatrix(hostMatrixB);

        RMatrix c = a + b;
        RMatrix d = a * 10;

        Console.WriteLine("Matrix A + B:");
        c.Show();
        Console.WriteLine("\nMatrix A * 10:");
        d.Show();
    }
}

Upvotes: 0

Views: 47

Answers (1)

Joe Care
Joe Care

Reputation: 128

Hello here is what I found out: You are calling "LoadAutoGroupedStreamKernel" from the ILGPU-package. This is a static helper-function that gives an Action<TIndex, T1, T2, T3> as an result, not a class. This function looks like:

 public static Action<TIndex, T1, T2, T3> LoadAutoGroupedStreamKernel<TIndex, T1, T2, T3>(
     this Accelerator accelerator,
     Action<TIndex, T1, T2, T3> action)
     where TIndex : struct, IIndex
     where T1 : struct where T2 : struct where T3 : struct
 {
     var baseKernel = accelerator.LoadAutoGroupedKernel<TIndex, T1, T2, T3>(action);
     return (TIndex index, T1 param1, T2 param2, T3 param3) =>
         baseKernel(accelerator.DefaultStream, index, param1, param2, param3);
 }

So this is already the method to make things easier. What you can do, is to extract the methods for your purpose like:

     private static Action<Index2D, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>> BuildAdditionKernel(Accelerator accelerator_)
    {
        return accelerator_.LoadAutoGroupedStreamKernel<Index2D, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>>(RMatrixKernels.AdditionKernel);
    }
    private static Action<Index2D, ArrayView2D<int, Stride2D.DenseX>, int, ArrayView2D<int, Stride2D.DenseX>> BuildSkalarMultiplyKernel(Accelerator accelerator_)
    {
        return accelerator_.LoadAutoGroupedStreamKernel<Index2D, ArrayView2D<int, Stride2D.DenseX>, int, ArrayView2D<int, Stride2D.DenseX>>(RMatrixKernels.ScalarMultiplyKernel);
    }

So that the two functions look like:

    public static RMatrix operator +(RMatrix a, RMatrix b)
    {
        if (a.Rows != b.Rows || a.Cols != b.Cols)
            throw new ArgumentException("Matrix dimensions must match for addition.");

        var deviceMatrixA = accelerator_.Allocate2DDenseX<int>(new Index2D(a.Rows, a.Cols));
        var deviceMatrixB = accelerator_.Allocate2DDenseX<int>(new Index2D(a.Rows, a.Cols));
        var deviceResult = accelerator_.Allocate2DDenseX<int>(new Index2D(a.Rows, a.Cols));

        deviceMatrixA.CopyFromCPU(a.matrix_);
        deviceMatrixB.CopyFromCPU(b.matrix_);

        //TODO: 
        var kernel = BuildAdditionKernel(accelerator_);

        kernel((a.Rows, a.Cols), deviceMatrixA.View, deviceMatrixB.View, deviceResult.View);

        accelerator_.Synchronize();

        int[,] hostResult = new int[a.Rows, a.Cols];
        deviceResult.CopyToCPU(hostResult);

        deviceMatrixA.Dispose();
        deviceMatrixB.Dispose();
        deviceResult.Dispose();

        return new RMatrix(hostResult);
    }

    public static RMatrix operator *(RMatrix a, int scalar)
    {
        var deviceMatrixA = accelerator_.Allocate2DDenseX<int>(new Index2D(a.Rows, a.Cols));
        var deviceResult = accelerator_.Allocate2DDenseX<int>(new Index2D(a.Rows, a.Cols));

        deviceMatrixA.CopyFromCPU(a.matrix_);

        //TODO: 
        var kernel = BuildSkalarMultiplyKernel(accelerator_);

        kernel((a.Rows, a.Cols), deviceMatrixA.View, scalar, deviceResult.View);

        accelerator_.Synchronize();

        int[,] hostResult = new int[a.Rows, a.Cols];
        deviceResult.CopyToCPU(hostResult);

        deviceMatrixA.Dispose();
        deviceResult.Dispose();

        return new RMatrix(hostResult);
    }

The next step would be to make your own helper-class like:

public static class RMatrixExtensions
{
    public static Action<Index2D, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>> BuildAdditionKernel(this Accelerator accelerator_)
    {
        return accelerator_.LoadAutoGroupedStreamKernel<Index2D, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>, ArrayView2D<int, Stride2D.DenseX>>(RMatrixKernels.AdditionKernel);
    }
    public static Action<Index2D, ArrayView2D<int, Stride2D.DenseX>, int, ArrayView2D<int, Stride2D.DenseX>> BuildSkalarMultiplyKernel(this Accelerator accelerator_)
    {
        return accelerator_.LoadAutoGroupedStreamKernel<Index2D, ArrayView2D<int, Stride2D.DenseX>, int, ArrayView2D<int, Stride2D.DenseX>>(RMatrixKernels.ScalarMultiplyKernel);
    }
}

The this operator makes it possible the function-calls look like:

[...]
     var kernel = accelerator_.BuildAdditionKernel();
[...]
// and
[...]
     var kernel = accelerator_.BuildSkalarMultiplyKernel();
[...]

But this is only a matter of taste. And you hide what's really going on.

Upvotes: 1

Related Questions