Reputation: 818
With Julia 1.5.3, I have the following code (of course it's not actual code, just a contrived example with which I could repro the problem):
function basic()
result::Tuple{Int64,Int64,Int64} = (0, 0, 0)
for i in 1:100000
for i in -1:1, j in -1:1, k in -1:1
result = result .+ (i, j, k)
end
end
result
end
function fancy(T)
result::T = (0, 0, 0)
for i in 1:100000
for i in -1:1, j in -1:1, k in -1:1
result = result .+ (i, j, k)::T
end
end
result
end
# Warmup
for i in 1:10
basic()
fancy(Tuple{Int64,Int64,Int64})
end
println("basic:")
println(@time basic())
println("fancy:")
println(@time fancy(Tuple{Int64,Int64,Int64}))
outputs:
basic:
0.000000 seconds
(0, 0, 0)
fancy:
0.762599 seconds (10.80 M allocations: 494.385 MiB, 3.68% gc time)
(0, 0, 0)
If I write instead:
result = result .+ (i, j, k)::T
I get:
basic:
0.000000 seconds
(0, 0, 0)
fancy:
0.789512 seconds (13.50 M allocations: 576.782 MiB, 3.77% gc time)
(0, 0, 0)
Is there a way to make basic
and fancy
perform the same, i.e. without allocations in fancy
?
Upvotes: 4
Views: 83
Reputation: 2862
Just change the signature of fancy
:
function fancy(::Type{T}) where T
result::T = (0, 0, 0)
for i in 1:100000
for i in -1:1, j in -1:1, k in -1:1
result = result .+ (i, j, k)::T
end
end
result
end
println(@time fancy(Tuple{Int64,Int64,Int64}))
Just note that both functions actually optimized to almost-no-op. You can inspect it with @code_native basic()
.
Upvotes: 3
Reputation: 6378
How about this?
function fancy(T)
result = T((0, 0, 0))
for i in 1:100000
for i in -1:1, j in -1:1, k in -1:1
result = result .+ T((i, j, k))
end
end
result
end
Upvotes: 1