.NET 4.6 RC x64 is twice as slow as x86 (release version)

Question

Net 4.6 RC x64 is twice as slow as x86 (release version):

Consider this piece of code:

class SpectralNorm
{
    public static void Main(String[] args)
    {
        int n = 5500;
        if (args.Length > 0) n = Int32.Parse(args[0]);

        var spec = new SpectralNorm();
        var watch = Stopwatch.StartNew();
        var res = spec.Approximate(n);

        Console.WriteLine("{0:f9} -- {1}", res, watch.Elapsed.TotalMilliseconds);
    }

    double Approximate(int n)
    {
        // create unit vector
        double[] u = new double[n];
        for (int i = 0; i < n; i++) u[i] = 1;

        // 20 steps of the power method
        double[] v = new double[n];
        for (int i = 0; i < n; i++) v[i] = 0;

        for (int i = 0; i < 10; i++)
        {
            MultiplyAtAv(n, u, v);
            MultiplyAtAv(n, v, u);
        }

        // B=AtA         A multiplied by A transposed
        // v.Bv /(v.v)   eigenvalue of v 
        double vBv = 0, vv = 0;
        for (int i = 0; i < n; i++)
        {
            vBv += u[i] * v[i];
            vv += v[i] * v[i];
        }

        return Math.Sqrt(vBv / vv);
    }


    /* return element i,j of infinite matrix A */
    double A(int i, int j)
    {
        return 1.0 / ((i + j) * (i + j + 1) / 2 + i + 1);
    }

    /* multiply vector v by matrix A */
    void MultiplyAv(int n, double[] v, double[] Av)
    {
        for (int i = 0; i < n; i++)
        {
            Av[i] = 0;
            for (int j = 0; j < n; j++) Av[i] += A(i, j) * v[j];
        }
    }

    /* multiply vector v by matrix A transposed */
    void MultiplyAtv(int n, double[] v, double[] Atv)
    {
        for (int i = 0; i < n; i++)
        {
            Atv[i] = 0;
            for (int j = 0; j < n; j++) Atv[i] += A(j, i) * v[j];
        }
    }

    /* multiply vector v by matrix A and then by matrix A transposed */
    void MultiplyAtAv(int n, double[] v, double[] AtAv)
    {
        double[] u = new double[n];
        MultiplyAv(n, v, u);
        MultiplyAtv(n, u, AtAv);
    }
}

On my machine x86 release version takes 4.5 seconds to complete, while the x64 takes 9.5 seconds. Is there any specific flag/setting needed for the x64?

UPDATE

It turns out that RyuJIT has a role in this issue. If useLegacyJit is enabled in app.config, the result is different and this time x64 is faster.

UPDATE

Now the issue has been reported to the CLR team coreclr, issue 993

.NET 4.6 RC x64 is twice as slow as x86 (release version)

Answers (1)

Related Questions