Skip to content

Commit

Permalink
Merge branch 'main' into ranocha-patch-2
Browse files Browse the repository at this point in the history
  • Loading branch information
ranocha authored Aug 5, 2024
2 parents c78e364 + 50cf879 commit 8b15fd8
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 29 deletions.
1 change: 1 addition & 0 deletions src/Trixi.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ using Preferences: @load_preference, set_preferences!

const _PREFERENCE_SQRT = @load_preference("sqrt", "sqrt_Trixi_NaN")
const _PREFERENCE_LOG = @load_preference("log", "log_Trixi_NaN")
const _PREFERENCE_POLYESTER = @load_preference("polyester", true)

# finite difference SBP operators
using SummationByPartsOperators: AbstractDerivativeOperator,
Expand Down
51 changes: 23 additions & 28 deletions src/auxiliary/auxiliary.jl
Original file line number Diff line number Diff line change
Expand Up @@ -204,36 +204,31 @@ Some discussion can be found at [https://discourse.julialang.org/t/overhead-of-t
and [https://discourse.julialang.org/t/threads-threads-with-one-thread-how-to-remove-the-overhead/58435](https://discourse.julialang.org/t/threads-threads-with-one-thread-how-to-remove-the-overhead/58435).
"""
macro threaded(expr)
# Use `esc(quote ... end)` for nested macro calls as suggested in
# https://github.com/JuliaLang/julia/issues/23221
#
# The following code is a simple version using only `Threads.@threads` from the
# standard library with an additional check whether only a single thread is used
# to reduce some overhead (and allocations) for serial execution.
#
# return esc(quote
# let
# if Threads.nthreads() == 1
# $(expr)
# else
# Threads.@threads $(expr)
# end
# end
# end)
#
# However, the code below using `@batch` from Polyester.jl is more efficient,
# since this packages provides threads with less overhead. Since it is written
# by Chris Elrod, the author of LoopVectorization.jl, we expect this package
# to provide the most efficient and useful implementation of threads (as we use
# them) available in Julia.
# !!! danger "Heisenbug"
# Look at the comments for `wrap_array` when considering to change this macro.

# By using `Trixi.@batch` we allow users of Trixi.jl to use `@threaded` without having
# Polyester.jl in their namespace.
return esc(quote
Trixi.@batch $(expr)
end)
expr = if _PREFERENCE_POLYESTER
# Currently using `@batch` from Polyester.jl is more efficient,
# bypasses the Julia task scheduler and provides parallelization with less overhead.
quote
$Trixi.@batch $(expr)
end
else
# The following code is a simple version using only `Threads.@threads` from the
# standard library with an additional check whether only a single thread is used
# to reduce some overhead (and allocations) for serial execution.
quote
let
if $Threads.nthreads() == 1
$(expr)
else
$Threads.@threads :static $(expr)
end
end
end
end
# Use `esc(quote ... end)` for nested macro calls as suggested in
# https://github.com/JuliaLang/julia/issues/23221
return esc(expr)
end

"""
Expand Down
15 changes: 15 additions & 0 deletions src/auxiliary/math.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,21 @@

const TRIXI_UUID = UUID("a7f1ee26-1774-49b1-8366-f1abc58fbfcb")

"""
Trixi.set_polyester!(toggle::Bool; force = true)
Toggle the usage of [Polyester.jl](https://github.com/JuliaSIMD/Polyester.jl) for multithreading.
By default, Polyester.jl is enabled, but it can
be useful for performance comparisons to switch to the Julia core backend.
This does not fully disable Polyester.jl,
buy only its use as part of Trixi.jl's `@threaded` macro.
"""
function set_polyester!(toggle::Bool; force = true)
set_preferences!(TRIXI_UUID, "polyester" => toggle, force = force)
@info "Please restart Julia and reload Trixi.jl for the `polyester` change to take effect"
end

"""
Trixi.set_sqrt_type(type; force = true)
Expand Down
3 changes: 3 additions & 0 deletions src/callbacks_step/summary.jl
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,9 @@ function initialize_summary_callback(cb::DiscreteCallback, u, t, integrator;

# technical details
setup = Pair{String, Any}["#threads" => Threads.nthreads()]
if !_PREFERENCE_POLYESTER
push!(setup, "Polyester" => "disabled")
end
if mpi_isparallel()
push!(setup,
"#MPI ranks" => mpi_nranks())
Expand Down
2 changes: 1 addition & 1 deletion src/solvers/dg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,7 @@ end
# since LoopVectorization does not support `ForwardDiff.Dual`s. Hence, we use
# optimized `PtrArray`s whenever possible and fall back to plain `Array`s
# otherwise.
if LoopVectorization.check_args(u_ode)
if _PREFERENCE_POLYESTER && LoopVectorization.check_args(u_ode)
# This version using `PtrArray`s from StrideArrays.jl is very fast and
# does not result in allocations.
#
Expand Down

0 comments on commit 8b15fd8

Please sign in to comment.