AboAmmar
AboAmmar

Reputation: 5559

Julia Code optimization

I have the following code from a previous question and I need help optimizing the code for speed. This is the code:

function OfdmSym()
    N = 64
    n = 1000
    symbol = ones(Complex{Float64}, n, 64)
    data   = ones(Complex{Float64}, 1, 48)
    unused = zeros(Complex{Float64}, 1, 12)
    pilot  = ones(Complex{Float64}, 1, 4)
    s      = [-1-im  -1+im  1-im  1+im]

    for i=1:n                  # generate 1000 symbols
        for j = 1:48           # generate 48 complex data symbols whose basis is s
              r = rand(1:4)    # 1, 2, 3, or 4
              data[j] = s[r]
        end
        symbol[i,:]=[data[1,1:10] pilot[1] data[1,11:20] pilot[2] data[1,21:30] pilot[3] data[1,31:40] pilot[4] data[1,41:48] unused]
    end
end
OfdmSym()

I appreciate your help.

Upvotes: 1

Views: 405

Answers (2)

rickhg12hs
rickhg12hs

Reputation: 11912

It's all just bits, right? This isn't clean (at all), but it runs slightly faster on my machine (which is much slower than yours so I won't bother posting my times). Is it a little faster on your machine?

function my_OfdmSym()
    const n = 100000
    const my_one_bits = uint64(1023) << 52
    const my_sign_bit = uint64(1) << 63
    my_sym = Array(Uint64,n<<1,64)
    fill!(my_sym, my_one_bits)
    for col = [1:10, 12:21, 23:32, 34:43, 45:52]
        for row = 1:(n<<1)
            if randbool() my_sym[row, col] |= my_sign_bit end
        end
    end
    my_symbol = reinterpret(Complex{Float64}, my_sym, (n, 64))
    for k in [11, 22, 33, 44]
        my_symbol[:, k] = 1.0
    end
    for k=53:64
        my_symbol[:, k] = 0.0
    end
end

Upvotes: 0

IainDunning
IainDunning

Reputation: 11654

First of all, I timed it with N = 100000

OfdmSym()  # Warmup
for i = 1:5
    @time OfdmSym()
end

and its pretty quick as it is

elapsed time: 3.235866305 seconds (1278393328 bytes allocated, 15.18% gc time)
elapsed time: 3.147812323 seconds (1278393328 bytes allocated, 14.89% gc time)
elapsed time: 3.144739194 seconds (1278393328 bytes allocated, 14.68% gc time)
elapsed time: 3.118775273 seconds (1278393328 bytes allocated, 14.79% gc time)
elapsed time: 3.137765971 seconds (1278393328 bytes allocated, 14.85% gc time)

But I rewrote using for loops to avoid the slicing:

function OfdmSym2()
    N = 64
    n = 100000
    symbol = zeros(Complex{Float64}, n, 64)
    s      = [-1-im, -1+im, 1-im, 1+im]
    for i=1:n
        for j = 1:48
            @inbounds symbol[i,j] = s[rand(1:4)]
        end
        symbol[i,11] = one(Complex{Float64})
        symbol[i,22] = one(Complex{Float64})
        symbol[i,33] = one(Complex{Float64})
        symbol[i,44] = one(Complex{Float64})
    end
end

OfdmSym2()  # Warmup
for i = 1:5
    @time OfdmSym2()
end

which is 20x faster

elapsed time: 0.159715932 seconds (102400256 bytes allocated, 12.80% gc time)
elapsed time: 0.159113184 seconds (102400256 bytes allocated, 14.75% gc time)
elapsed time: 0.158200345 seconds (102400256 bytes allocated, 14.82% gc time)
elapsed time: 0.158469032 seconds (102400256 bytes allocated, 15.00% gc time)
elapsed time: 0.157919113 seconds (102400256 bytes allocated, 14.86% gc time)

If you look at the profiler (@profile) you'll see that most of the time is spent generating random numbers, as you'd expect, as everything else is just moving numbers around.

Upvotes: 3

Related Questions