Reputation: 1245
I've seen multiple questions addressing memory allocation in Julia in general, however none of these examples helped me. I provide a minimal example that shall illustrate my problem. I implemented a finite volume solver that computes the solution of an advection equation. Long story short here the (self contained) code:
function dummyexample()
nx = 100
Δx = 1.0/nx
x = range(Δx/2.0, length=nx, step=Δx)
ρ = sin.(2π*x)
for i=1:floor(1.0/Δx / 0.5)
shu_osher_step!(ρ) # This part is executed several times
end
println(sum(Δx*abs.(ρ .- sin.(2π*x))))
end
function shu_osher_step!(ρ::AbstractArray)
ρ₁ = euler_step(ρ) # array allocation
ρ₂ = 3.0/4.0*ρ .+ 1.0/4.0*euler_step(ρ₁) # array allocation
ρ .= 1.0/3.0*ρ .+ 2.0/3.0*euler_step(ρ₂) # array allocation
end
function euler_step(ρ::AbstractArray)
return ρ .+ 0.5*rhs(ρ)
end
function rhs(ρ::AbstractArray)
ρₗ = circshift(ρ,+1) # array allocation
ρᵣ = circshift(ρ,-1) # array allocation
Δρₗ = ρ.-ρₗ # array allocation
Δρᵣ = ρᵣ .-ρ # array allocation
vᵣ = ρ .+ 1.0/2.0 .* H(Δρₗ,Δρᵣ) # array allocation
return -(vᵣ .- circshift(vᵣ,+1)) # array allocation
end
function H(Δρₗ::AbstractArray,Δρᵣ::AbstractArray)
σ = Δρₗ ./ Δρᵣ
σ̃ = max.(abs.(σ),1e-12) .* (2.0 .* (σ .>= 0.0) .- 1.0)
for i=1:100
if isnan(σ̃[i])
σ̃[i] = 1e-12
end
end
return Δρₗ .* (2.0/3.0*(1.0 ./ σ̃) .+ 1.0/3.0)
end
My problem is, that deep down in the call tree the function rhs
allocates several arrays in every iteration of the most upper time loop. These arrays are temporary and I do not like the fact that they have to be reallocated every iteration. Here the output from @time
:
julia> include("dummyexample.jl");
julia> @time dummyexample()
8.780349744014917e-5 # <- just to check that the error is almost zero
0.362833 seconds (627.38 k allocations: 39.275 MiB, 1.95% gc time)
Now in the real code, there is actually a struct p
passed down the whole calltree that contains attributes which I hardcoded here (basically every of the explicitly stated numbers would be referenced by p.n
, etc.)
I could probably also pass down preallocated arrays like but that seems to get messy and I would have to change that every time I want to do extra computations.
Global arrays are discouraged in the Julia documentation but wouldn't that do the trick here? Are there any other obvious things I am missing? I am considering Julia 1.0.
Upvotes: 2
Views: 1071
Reputation: 20980
Passing down preallocated arrays, as you say in the last paragraph, is exactly the right thing in this kind of situation. Additional to that, I would devectorize the code into a manual loop containing a stencil and more indexing math instead of circshift
.
Applying both ideas results in the following:
function dummyexample()
nx = 100
Δx = 1.0 / nx
steps = 2 ÷ Δx
x = range(Δx ÷ 2, length = nx, step = Δx)
ρ = sin.(2π .* x)
run!(ρ, steps)
println(sum(@. Δx * abs(ρ - sin(2π * x))))
end
function run!(ρ, steps)
ρ₁, ρ₂, v = similar(ρ), similar(ρ), similar(ρ)
for i = 1:steps
shu_osher_step!(ρ₁, ρ₂, v, ρ)
end
return ρ
end
function shu_osher_step!(ρ₁, ρ₂, v, ρ)
euler_step!(ρ₁, v, ρ)
ρ₂ .= 3.0/4.0 .* ρ .+ 1.0/4.0 .* euler_step!(ρ₂, v, ρ₁)
ρ .= 1.0/3.0 .* ρ .+ 2.0/3.0 .* euler_step!(ρ, v, ρ₂)
end
function euler_step!(ρₒ, v, ρ)
cycle(i) = mod(i - 1, length(ρ)) + 1
# two steps of calculating v fused into one -- could be replaced by
# an extra loop for v.
for I in 1:2:size(ρ, 1)
v[I] = rhs(ρ[cycle(I-1)], ρ[I], ρ[cycle(I+1)])
v[cycle(I+1)] = rhs(ρ[cycle(I)], ρ[I+1], ρ[cycle(I+2)])
ρₒ[I] += 0.5 * (v[cycle(I+1)] - v[I])
end
return ρₒ
end
function rhs(ρₗ, ρᵢ, ρᵣ)
Δρₗ = ρᵢ - ρₗ
Δρᵣ = ρᵣ - ρᵢ
return ρᵢ + 1/2 * H(Δρₗ, Δρᵣ)
end
function H(Δρₗ, Δρᵣ)
σ = Δρₗ / Δρᵣ
σ̃ = max(abs(σ), 1e-12) * (2.0 * (σ >= 0.0) - 1.0)
isnan(σ̃) && (σ̃ = 1e-12)
return Δρₗ * (2.0 / 3.0 * (1.0 / σ̃) + 1.0 / 3.0)
end
The above might still contain some logic errors due to my lack of domain knowledge (dummyexample()
prints 0.02984422033942575
), but you see the pattern. And it benchmarks well:
julia> @benchmark run!($ρ, $steps)
BenchmarkTools.Trial:
memory estimate: 699.13 KiB
allocs estimate: 799
--------------
minimum time: 3.024 ms (0.00% GC)
median time: 3.164 ms (0.00% GC)
mean time: 3.760 ms (1.69% GC)
maximum time: 57.105 ms (94.41% GC)
--------------
samples: 1327
evals/sample: 1
Upvotes: 3