From 50cf879cab9ef8a1d629b33239ced49aeb1d166f Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 5 Aug 2024 14:47:09 +0200 Subject: [PATCH] Add a preference to disable Polyester (#2029) * Add preference to disable Polyester usage Co-authored-by: Hendrik Ranocha * Apply suggestions from code review Co-authored-by: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com> --------- Co-authored-by: Hendrik Ranocha Co-authored-by: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com> --- src/Trixi.jl | 1 + src/auxiliary/auxiliary.jl | 51 ++++++++++++++++------------------- src/auxiliary/math.jl | 15 +++++++++++ src/callbacks_step/summary.jl | 3 +++ src/solvers/dg.jl | 2 +- 5 files changed, 43 insertions(+), 29 deletions(-) diff --git a/src/Trixi.jl b/src/Trixi.jl index 1a509ed92d..23a8cfe1d0 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -80,6 +80,7 @@ using Preferences: @load_preference, set_preferences! const _PREFERENCE_SQRT = @load_preference("sqrt", "sqrt_Trixi_NaN") const _PREFERENCE_LOG = @load_preference("log", "log_Trixi_NaN") +const _PREFERENCE_POLYESTER = @load_preference("polyester", true) # finite difference SBP operators using SummationByPartsOperators: AbstractDerivativeOperator, diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl index 6259e93673..97263405d2 100644 --- a/src/auxiliary/auxiliary.jl +++ b/src/auxiliary/auxiliary.jl @@ -204,36 +204,31 @@ Some discussion can be found at [https://discourse.julialang.org/t/overhead-of-t and [https://discourse.julialang.org/t/threads-threads-with-one-thread-how-to-remove-the-overhead/58435](https://discourse.julialang.org/t/threads-threads-with-one-thread-how-to-remove-the-overhead/58435). """ macro threaded(expr) - # Use `esc(quote ... end)` for nested macro calls as suggested in - # https://github.com/JuliaLang/julia/issues/23221 - # - # The following code is a simple version using only `Threads.@threads` from the - # standard library with an additional check whether only a single thread is used - # to reduce some overhead (and allocations) for serial execution. - # - # return esc(quote - # let - # if Threads.nthreads() == 1 - # $(expr) - # else - # Threads.@threads $(expr) - # end - # end - # end) - # - # However, the code below using `@batch` from Polyester.jl is more efficient, - # since this packages provides threads with less overhead. Since it is written - # by Chris Elrod, the author of LoopVectorization.jl, we expect this package - # to provide the most efficient and useful implementation of threads (as we use - # them) available in Julia. # !!! danger "Heisenbug" # Look at the comments for `wrap_array` when considering to change this macro. - - # By using `Trixi.@batch` we allow users of Trixi.jl to use `@threaded` without having - # Polyester.jl in their namespace. - return esc(quote - Trixi.@batch $(expr) - end) + expr = if _PREFERENCE_POLYESTER + # Currently using `@batch` from Polyester.jl is more efficient, + # bypasses the Julia task scheduler and provides parallelization with less overhead. + quote + $Trixi.@batch $(expr) + end + else + # The following code is a simple version using only `Threads.@threads` from the + # standard library with an additional check whether only a single thread is used + # to reduce some overhead (and allocations) for serial execution. + quote + let + if $Threads.nthreads() == 1 + $(expr) + else + $Threads.@threads :static $(expr) + end + end + end + end + # Use `esc(quote ... end)` for nested macro calls as suggested in + # https://github.com/JuliaLang/julia/issues/23221 + return esc(expr) end """ diff --git a/src/auxiliary/math.jl b/src/auxiliary/math.jl index 9e3aaa181b..0bd5ad438f 100644 --- a/src/auxiliary/math.jl +++ b/src/auxiliary/math.jl @@ -7,6 +7,21 @@ const TRIXI_UUID = UUID("a7f1ee26-1774-49b1-8366-f1abc58fbfcb") +""" + Trixi.set_polyester!(toggle::Bool; force = true) + +Toggle the usage of [Polyester.jl](https://github.com/JuliaSIMD/Polyester.jl) for multithreading. +By default, Polyester.jl is enabled, but it can +be useful for performance comparisons to switch to the Julia core backend. + +This does not fully disable Polyester.jl, +buy only its use as part of Trixi.jl's `@threaded` macro. +""" +function set_polyester!(toggle::Bool; force = true) + set_preferences!(TRIXI_UUID, "polyester" => toggle, force = force) + @info "Please restart Julia and reload Trixi.jl for the `polyester` change to take effect" +end + """ Trixi.set_sqrt_type(type; force = true) diff --git a/src/callbacks_step/summary.jl b/src/callbacks_step/summary.jl index 21c7fc780a..465cc10a31 100644 --- a/src/callbacks_step/summary.jl +++ b/src/callbacks_step/summary.jl @@ -207,6 +207,9 @@ function initialize_summary_callback(cb::DiscreteCallback, u, t, integrator; # technical details setup = Pair{String, Any}["#threads" => Threads.nthreads()] + if !_PREFERENCE_POLYESTER + push!(setup, "Polyester" => "disabled") + end if mpi_isparallel() push!(setup, "#MPI ranks" => mpi_nranks()) diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index fb4c8f182e..628e39e6a8 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -629,7 +629,7 @@ end # since LoopVectorization does not support `ForwardDiff.Dual`s. Hence, we use # optimized `PtrArray`s whenever possible and fall back to plain `Array`s # otherwise. - if LoopVectorization.check_args(u_ode) + if _PREFERENCE_POLYESTER && LoopVectorization.check_args(u_ode) # This version using `PtrArray`s from StrideArrays.jl is very fast and # does not result in allocations. #