HiroIshida
HiroIshida

Reputation: 1603

loop with array access is slow in Julia

I did a comparison between a loop with and without array access as below and found that the performance difference between the two was huge: 1.463677[sec] vs 0.086808[sec].

Could you explain how to improve my code with array access and why this happens?

@inline dist2(p, q) = sqrt((p[1]-q[1])^2+(p[2]-q[2])^2)
function rand_gen()
    r2set = Array[]
    for i=1:10000
        r2_add = rand(2, 1)
        push!(r2set, r2_add)
    end
    return r2set
end

function test()
    N = 10000
    r2set = rand_gen()
    a = [1 1]
    b = [2 2]

    @time for i=1:N, j=1:N
        dist2(r2set[i], r2set[j])
    end

    @time for i=1:N, j=1:N
        dist2(a, b)
    end
end
test()

Upvotes: 0

Views: 1127

Answers (1)

Bogumił Kamiński
Bogumił Kamiński

Reputation: 69869

Make r2set have a concrete type like this (see also https://docs.julialang.org/en/latest/manual/performance-tips/#Avoid-containers-with-abstract-type-parameters-1):

@inline dist2(p, q) = sqrt((p[1]-q[1])^2+(p[2]-q[2])^2)
function rand_gen()
    r2set = Matrix{Float64}[]
    for i=1:10000
        r2_add = rand(2, 1)
        push!(r2set, r2_add)
    end
    return r2set
end

function test()
    N = 10000
    r2set = rand_gen()
    a = [1 1]
    b = [2 2]

    @time for i=1:N, j=1:N
        dist2(r2set[i], r2set[j])
    end

    @time for i=1:N, j=1:N
        dist2(a, b)
    end
end
test()

And now the tests are:

julia> test()
  0.347000 seconds
  0.147696 seconds

which is already better.

Now if you really want speed use immutable type, e.g. Tuple not an array like this:

@inline dist2(p, q) = sqrt((p[1]-q[1])^2+(p[2]-q[2])^2)
function rand_gen()
    r2set = Tuple{Float64,Float64}[]
    for i=1:10000
        r2_add = (rand(), rand())
        push!(r2set, r2_add)
    end
    return r2set
end

function test()
    N = 10000
    r2set = rand_gen()
    a = (1,1)
    b = (2,2)

    s = 0.0
    @time for i=1:N, j=1:N
        @inbounds s += dist2(r2set[i], r2set[j])
    end

    @time for i=1:N, j=1:N
        s += dist2(a, b)
    end
end
test()

And you will comparable speed of both:

julia> test()
  0.038901 seconds
  0.039666 seconds

julia> test()
  0.041379 seconds
  0.039910 seconds

Note that I have added an addition of s because without it Julia optimized out the loop by noticing that it does not do any work.

The key is that if you store arrays in an array then the outer array holds pointers to inner arrays while with immutable types the data is stored directly.

Upvotes: 5

Related Questions