From 59aedd161063182e2880a1773790442c545ac9e9 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 18 Nov 2020 14:04:50 -0500 Subject: [PATCH 1/2] threads: avoid deadlock from recursive lock acquire Finalizers can't safely acquire many essential locks (such as the iolock, to cleanup libuv objects) if they are run inside another lock. Therefore, inhibit all finalizers on the thread until all locks are released (previously, this was only true for our internal locks). --- NEWS.md | 3 ++- base/gcutils.jl | 10 ++++++++ base/lock.jl | 25 +++++++++++++++++--- base/locks-mt.jl | 11 ++++++++- src/gc.c | 32 +++++++++++++++++++++---- src/julia.h | 3 --- src/locks.h | 9 ++++--- src/rtutils.c | 12 ++++++---- src/task.c | 17 ++++++++++--- stdlib/Distributed/src/messages.jl | 5 ++-- test/core.jl | 10 ++++---- test/misc.jl | 38 +++++++++++++++++++++++++++--- 12 files changed, 142 insertions(+), 33 deletions(-) diff --git a/NEWS.md b/NEWS.md index 487664d821eb2..ca2d3cb4fc31e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -72,6 +72,8 @@ Command-line option changes Multi-threading changes ----------------------- +* Locks now automatically inhibit finalizers from running, to avoid deadlock ([#TBD]). +* New function `Base.Threads.foreach(f, channel::Channel)` for multithreaded `Channel` consumption ([#34543]). Build system changes -------------------- @@ -85,7 +87,6 @@ New library functions --------------------- * New function `Base.kron!` and corresponding overloads for various matrix types for performing Kronecker product in-place ([#31069]). -* New function `Base.Threads.foreach(f, channel::Channel)` for multithreaded `Channel` consumption ([#34543]). * New function `Base.readeach(io, T)` for iteratively performing `read(io, T)` ([#36150]). * `Iterators.map` is added. It provides another syntax `Iterators.map(f, iterators...)` for writing `(f(args...) for args in zip(iterators...))`, i.e. a lazy `map` ([#34352]). diff --git a/base/gcutils.jl b/base/gcutils.jl index 51e3943877444..82bdf74c9a4d4 100644 --- a/base/gcutils.jl +++ b/base/gcutils.jl @@ -106,6 +106,16 @@ Control whether garbage collection is enabled using a boolean argument (`true` f """ enable(on::Bool) = ccall(:jl_gc_enable, Int32, (Int32,), on) != 0 +""" + GC.enable_finalizers(on::Bool) + +Increment or decrement the counter that controls the running of finalizers on +the current Task. Finalizers will only run when the counter is at zero. (Set +`true` for enabling, `false` for disabling). They may still run concurrently on +another Task or thread. +""" +enable_finalizers(on::Bool) = ccall(:jl_gc_enable_finalizers, Cvoid, (Ptr{Cvoid}, Int32,), C_NULL, on) + """ GC.@preserve x1 x2 ... xn expr diff --git a/base/lock.jl b/base/lock.jl index 7033fdd80cff9..7a8e0eaef878e 100644 --- a/base/lock.jl +++ b/base/lock.jl @@ -6,9 +6,24 @@ const ThreadSynchronizer = GenericCondition{Threads.SpinLock} """ ReentrantLock() -Creates a re-entrant lock for synchronizing [`Task`](@ref)s. -The same task can acquire the lock as many times as required. -Each [`lock`](@ref) must be matched with an [`unlock`](@ref). +Creates a re-entrant lock for synchronizing [`Task`](@ref)s. The same task can +acquire the lock as many times as required. Each [`lock`](@ref) must be matched +with an [`unlock`](@ref). + +Calling 'lock' will also inhibit running of finalizers on that thread until the +corresponding 'unlock'. Use of the standard lock pattern illustrated below +should naturally be supported, but beware of inverting the try/lock order or +missing the try block entirely (e.g. attempting to return with the lock still +held): + +``` +lock(l) +try + +finally + unlock(l) +end +``` """ mutable struct ReentrantLock <: AbstractLock locked_by::Union{Task, Nothing} @@ -50,6 +65,7 @@ function trylock(rl::ReentrantLock) if rl.reentrancy_cnt == 0 rl.locked_by = t rl.reentrancy_cnt = 1 + GC.enable_finalizers(false) got = true else got = false @@ -77,6 +93,7 @@ function lock(rl::ReentrantLock) if rl.reentrancy_cnt == 0 rl.locked_by = t rl.reentrancy_cnt = 1 + GC.enable_finalizers(false) break end try @@ -118,6 +135,7 @@ function unlock(rl::ReentrantLock) rethrow() end end + GC.enable_finalizers(true) unlock(rl.cond_wait) end return @@ -139,6 +157,7 @@ function unlockall(rl::ReentrantLock) rethrow() end end + GC.enable_finalizers(true) unlock(rl.cond_wait) return n end diff --git a/base/locks-mt.jl b/base/locks-mt.jl index 49c7d2c0f9011..6a3b68016cb81 100644 --- a/base/locks-mt.jl +++ b/base/locks-mt.jl @@ -61,10 +61,12 @@ Base.assert_havelock(l::SpinLock) = islocked(l) ? nothing : Base.concurrency_vio function lock(l::SpinLock) while true if _get(l) == 0 + GC.enable_finalizers(false) p = _xchg!(l, 1) if p == 0 return end + GC.enable_finalizers(true) end ccall(:jl_cpu_pause, Cvoid, ()) # Temporary solution before we have gc transition support in codegen. @@ -74,13 +76,20 @@ end function trylock(l::SpinLock) if _get(l) == 0 - return _xchg!(l, 1) == 0 + GC.enable_finalizers(false) + p = _xchg!(l, 1) + if p == 0 + return true + end + GC.enable_finalizers(true) end return false end function unlock(l::SpinLock) + _get(l) == 0 && error("unlock count must match lock count") _set!(l, 0) + GC.enable_finalizers(true) ccall(:jl_cpu_wake, Cvoid, ()) return end diff --git a/src/gc.c b/src/gc.c index 53d4641490f5e..a7293df548394 100644 --- a/src/gc.c +++ b/src/gc.c @@ -280,7 +280,7 @@ static void run_finalizer(jl_ptls_t ptls, jl_value_t *o, jl_value_t *ff) jl_printf((JL_STREAM*)STDERR_FILENO, "error in running finalizer: "); jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception()); jl_printf((JL_STREAM*)STDERR_FILENO, "\n"); - jlbacktrace(); // writen to STDERR_FILENO + jlbacktrace(); // written to STDERR_FILENO } } @@ -392,12 +392,36 @@ static void run_finalizers(jl_ptls_t ptls) arraylist_free(&copied_list); } +JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls) +{ + if (ptls == NULL) + ptls = jl_get_ptls_states(); + return ptls->finalizers_inhibited; +} + JL_DLLEXPORT void jl_gc_enable_finalizers(jl_ptls_t ptls, int on) { + if (ptls == NULL) + ptls = jl_get_ptls_states(); int old_val = ptls->finalizers_inhibited; int new_val = old_val + (on ? -1 : 1); + if (new_val < 0) { + JL_TRY { + jl_error(""); // get a backtrace + } + JL_CATCH { + jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: GC finalizers already enabled on this thread.\n"); + // Only print the backtrace once, to avoid spamming the logs + static int backtrace_printed = 0; + if (backtrace_printed == 0) { + backtrace_printed = 1; + jlbacktrace(); // written to STDERR_FILENO + } + } + return; + } ptls->finalizers_inhibited = new_val; - if (!new_val && old_val && !ptls->in_finalizer) { + if (!new_val && old_val && !ptls->in_finalizer && ptls->locks.len == 0) { ptls->in_finalizer = 1; run_finalizers(ptls); ptls->in_finalizer = 0; @@ -1581,7 +1605,7 @@ STATIC_INLINE uintptr_t gc_read_stack(void *_addr, uintptr_t offset, JL_NORETURN NOINLINE void gc_assert_datatype_fail(jl_ptls_t ptls, jl_datatype_t *vt, jl_gc_mark_sp_t sp) { - jl_printf(JL_STDOUT, "GC error (probable corruption) :\n"); + jl_safe_printf("GC error (probable corruption) :\n"); gc_debug_print_status(); jl_(vt); gc_debug_critical_error(); @@ -3192,7 +3216,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) // Only disable finalizers on current thread // Doing this on all threads is racy (it's impossible to check // or wait for finalizers on other threads without dead lock). - if (!ptls->finalizers_inhibited) { + if (!ptls->finalizers_inhibited && ptls->locks.len == 0) { int8_t was_in_finalizer = ptls->in_finalizer; ptls->in_finalizer = 1; run_finalizers(ptls); diff --git a/src/julia.h b/src/julia.h index ec02eb8f32334..612b926d759c2 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1730,7 +1730,6 @@ typedef struct _jl_handler_t { int8_t gc_state; size_t locks_len; sig_atomic_t defer_signal; - int finalizers_inhibited; jl_timing_block_t *timing_stack; size_t world_age; } jl_handler_t; @@ -1753,8 +1752,6 @@ typedef struct _jl_task_t { int16_t tid; // multiqueue priority int16_t prio; - // current world age - size_t world_age; // saved exception stack jl_excstack_t *excstack; // current exception handler diff --git a/src/locks.h b/src/locks.h index af5f33a7ea5ea..b02fc8c5011e5 100644 --- a/src/locks.h +++ b/src/locks.h @@ -84,11 +84,9 @@ static inline void jl_lock_frame_pop(void) static inline void jl_mutex_lock(jl_mutex_t *lock) { - jl_ptls_t ptls = jl_get_ptls_states(); JL_SIGATOMIC_BEGIN(); jl_mutex_wait(lock, 1); jl_lock_frame_push(lock); - jl_gc_enable_finalizers(ptls, 0); } static inline int jl_mutex_trylock_nogc(jl_mutex_t *lock) @@ -111,10 +109,8 @@ static inline int jl_mutex_trylock(jl_mutex_t *lock) { int got = jl_mutex_trylock_nogc(lock); if (got) { - jl_ptls_t ptls = jl_get_ptls_states(); JL_SIGATOMIC_BEGIN(); jl_lock_frame_push(lock); - jl_gc_enable_finalizers(ptls, 0); } return got; } @@ -134,9 +130,12 @@ static inline void jl_mutex_unlock(jl_mutex_t *lock) { jl_ptls_t ptls = jl_get_ptls_states(); jl_mutex_unlock_nogc(lock); - jl_gc_enable_finalizers(ptls, 1); jl_lock_frame_pop(); JL_SIGATOMIC_END(); + if (ptls->locks.len == 0 && ptls->finalizers_inhibited == 0) { + ptls->finalizers_inhibited = 1; + jl_gc_enable_finalizers(ptls, 1); // call run_finalizers (may GC) + } } static inline void jl_mutex_init(jl_mutex_t *lock) JL_NOTSAFEPOINT diff --git a/src/rtutils.c b/src/rtutils.c index 03fe6f461d564..9e755784efbec 100644 --- a/src/rtutils.c +++ b/src/rtutils.c @@ -218,7 +218,6 @@ JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh) eh->gc_state = ptls->gc_state; eh->locks_len = ptls->locks.len; eh->defer_signal = ptls->defer_signal; - eh->finalizers_inhibited = ptls->finalizers_inhibited; eh->world_age = ptls->world_age; current_task->eh = eh; #ifdef ENABLE_TIMINGS @@ -249,14 +248,14 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh) current_task->eh = eh->prev; ptls->pgcstack = eh->gcstack; small_arraylist_t *locks = &ptls->locks; - if (locks->len > eh->locks_len) { - for (size_t i = locks->len;i > eh->locks_len;i--) + int unlocks = locks->len > eh->locks_len; + if (unlocks) { + for (size_t i = locks->len; i > eh->locks_len; i--) jl_mutex_unlock_nogc((jl_mutex_t*)locks->items[i - 1]); locks->len = eh->locks_len; } ptls->world_age = eh->world_age; ptls->defer_signal = eh->defer_signal; - ptls->finalizers_inhibited = eh->finalizers_inhibited; if (old_gc_state != eh->gc_state) { jl_atomic_store_release(&ptls->gc_state, eh->gc_state); if (old_gc_state) { @@ -266,6 +265,11 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh) if (old_defer_signal && !eh->defer_signal) { jl_sigint_safepoint(ptls); } + if (unlocks && eh->locks_len == 0 && ptls->finalizers_inhibited == 0) { + // call run_finalizers + ptls->finalizers_inhibited = 1; + jl_gc_enable_finalizers(ptls, 1); + } } JL_DLLEXPORT void jl_pop_handler(int n) diff --git a/src/task.c b/src/task.c index 3d7b40191c3d6..1f6537e71038d 100644 --- a/src/task.c +++ b/src/task.c @@ -393,9 +393,8 @@ static void ctx_switch(jl_ptls_t ptls) } // set up global state for new task - lastt->world_age = ptls->world_age; ptls->pgcstack = t->gcstack; - ptls->world_age = t->world_age; + ptls->world_age = 0; t->gcstack = NULL; #ifdef MIGRATE_TASKS ptls->previous_task = lastt; @@ -510,8 +509,14 @@ JL_DLLEXPORT void jl_switch(void) else if (t->tid != ptls->tid) { jl_error("cannot switch to task running on another thread"); } + + // Store old values on the stack and reset sig_atomic_t defer_signal = ptls->defer_signal; int8_t gc_state = jl_gc_unsafe_enter(ptls); + size_t world_age = ptls->world_age; + int finalizers_inhibited = ptls->finalizers_inhibited; + ptls->world_age = 0; + ptls->finalizers_inhibited = 0; #ifdef ENABLE_TIMINGS jl_timing_block_t *blk = ct->timing_stack; @@ -533,7 +538,12 @@ JL_DLLEXPORT void jl_switch(void) assert(ptls == refetch_ptls()); #endif - ct = ptls->current_task; + // Pop old values back off the stack + assert(ct == ptls->current_task && + 0 == ptls->world_age && + 0 == ptls->finalizers_inhibited); + ptls->world_age = world_age; + ptls->finalizers_inhibited = finalizers_inhibited; #ifdef ENABLE_TIMINGS assert(blk == ct->timing_stack); @@ -796,6 +806,7 @@ STATIC_OR_JS void NOINLINE JL_NORETURN start_task(void) jl_ptls_t ptls = jl_get_ptls_states(); jl_task_t *t = ptls->current_task; jl_value_t *res; + assert(ptls->finalizers_inhibited == 0); #ifdef MIGRATE_TASKS jl_task_t *pt = ptls->previous_task; diff --git a/stdlib/Distributed/src/messages.jl b/stdlib/Distributed/src/messages.jl index 5d590ac6a4e27..1424855ec1995 100644 --- a/stdlib/Distributed/src/messages.jl +++ b/stdlib/Distributed/src/messages.jl @@ -135,6 +135,7 @@ function flush_gc_msgs(w::Worker) end # del_msgs gets populated by finalizers, so be very careful here about ordering of allocations + # XXX: threading requires this to be atomic new_array = Any[] msgs = w.del_msgs w.del_msgs = new_array @@ -166,7 +167,7 @@ function send_msg_(w::Worker, header, msg, now::Bool) wait(w.initialized) end io = w.w_stream - lock(io.lock) + lock(io) try reset_state(w.w_serializer) serialize_hdr_raw(io, header) @@ -179,7 +180,7 @@ function send_msg_(w::Worker, header, msg, now::Bool) flush(io) end finally - unlock(io.lock) + unlock(io) end end diff --git a/test/core.jl b/test/core.jl index bb457e44d89f3..28af6b88ceb4d 100644 --- a/test/core.jl +++ b/test/core.jl @@ -6,7 +6,6 @@ using Random, SparseArrays, InteractiveUtils const Bottom = Union{} - # For curmod_* include("testenv.jl") @@ -2081,7 +2080,7 @@ mutable struct A6142 <: AbstractMatrix{Float64}; end +(x::A6142, y::AbstractRange) = "AbstractRange method called" #16324 ambiguity # issue #6175 -function g6175(); print(""); (); end +function g6175(); GC.safepoint(); (); end g6175(i::Real, I...) = g6175(I...) g6175(i, I...) = tuple(length(i), g6175(I...)...) @test g6175(1:5) === (5,) @@ -2221,7 +2220,7 @@ day_in(obj6387) function segfault6793(;gamma=1) A = 1 B = 1 - print() + GC.safepoint() return -gamma nothing @@ -3327,7 +3326,7 @@ function f11065() if i == 1 z = "z is defined" elseif i == 2 - print(z) + print(z) # z is undefined end end end @@ -4244,7 +4243,10 @@ end end # disable GC to make sure no collection/promotion happens # when we are constructing the objects +get_finalizers_inhibited() = ccall(:jl_gc_get_finalizers_inhibited, Int32, (Ptr{Cvoid},), C_NULL) let gc_enabled13995 = GC.enable(false) + @assert gc_enabled13995 + @assert get_finalizers_inhibited() == 0 finalized13995 = [false, false, false, false] create_dead_object13995(finalized13995) GC.enable(true) diff --git a/test/misc.jl b/test/misc.jl index 70af5595b1074..41b92785fa5b6 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -100,9 +100,12 @@ let """ end +# Debugging tool: return the current state of the enable_finalizers counter. +get_finalizers_inhibited() = ccall(:jl_gc_get_finalizers_inhibited, Int32, (Ptr{Cvoid},), C_NULL) + # lock / unlock let l = ReentrantLock() - lock(l) + @test lock(l) === nothing @test islocked(l) success = Ref(false) @test trylock(l) do @@ -115,14 +118,43 @@ let l = ReentrantLock() @test success[] t = @async begin @test trylock(l) do - @test false + error("unreachable") end === false end + @test get_finalizers_inhibited() == 1 Base.wait(t) - unlock(l) + @test get_finalizers_inhibited() == 1 + @test unlock(l) === nothing + @test get_finalizers_inhibited() == 0 @test_throws ErrorException unlock(l) end +for l in (Threads.SpinLock(), ReentrantLock()) + @test get_finalizers_inhibited() == 0 + @test lock(get_finalizers_inhibited, l) == 1 + @test get_finalizers_inhibited() == 0 + try + GC.enable_finalizers(false) + GC.enable_finalizers(false) + @test get_finalizers_inhibited() == 2 + GC.enable_finalizers(true) + @test get_finalizers_inhibited() == 1 + finally + @test get_finalizers_inhibited() == 1 + GC.enable_finalizers(false) + @test get_finalizers_inhibited() == 2 + end + @test get_finalizers_inhibited() == 2 + GC.enable_finalizers(true) + @test get_finalizers_inhibited() == 1 + GC.enable_finalizers(true) + @test get_finalizers_inhibited() == 0 + @test_warn "WARNING: GC finalizers already enabled on this thread." GC.enable_finalizers(true) + + @test lock(l) === nothing + @test try unlock(l) finally end === nothing +end + # task switching @noinline function f6597(c) From 72bebdf2715f94bad52c701cfb867744a5b070d6 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 30 Nov 2020 18:28:22 -0500 Subject: [PATCH 2/2] move timing_stack field from jl_task_t onto the stack also --- src/init.c | 3 --- src/julia.h | 2 -- src/julia_threads.h | 1 + src/rtutils.c | 2 +- src/task.c | 11 +++++------ src/timing.c | 18 +++++++++--------- src/timing.h | 7 ++++--- 7 files changed, 20 insertions(+), 24 deletions(-) diff --git a/src/init.c b/src/init.c index 43905933f391a..e69c43837787d 100644 --- a/src/init.c +++ b/src/init.c @@ -745,9 +745,6 @@ void _julia_init(JL_IMAGE_SEARCH rel) jl_init_tasks(); jl_init_root_task(stack_lo, stack_hi); -#ifdef ENABLE_TIMINGS - jl_root_task->timing_stack = jl_root_timing; -#endif jl_init_common_symbols(); jl_init_flisp(); jl_init_serializer(); diff --git a/src/julia.h b/src/julia.h index 612b926d759c2..63b58b7d3e009 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1765,8 +1765,6 @@ typedef struct _jl_task_t { // saved gc stack top for context switches jl_gcframe_t *gcstack; - - jl_timing_block_t *timing_stack; } jl_task_t; #define JL_TASK_STATE_RUNNABLE 0 diff --git a/src/julia_threads.h b/src/julia_threads.h index 1e3b762ef385f..1eaf0e0035f52 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -207,6 +207,7 @@ struct _jl_tls_states_t { struct _jl_task_t *previous_task; #endif struct _jl_task_t *root_task; + struct _jl_timing_block_t *timing_stack; void *stackbase; size_t stacksize; jl_ucontext_t base_ctx; // base context of stack diff --git a/src/rtutils.c b/src/rtutils.c index 9e755784efbec..5ac68510ae392 100644 --- a/src/rtutils.c +++ b/src/rtutils.c @@ -221,7 +221,7 @@ JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh) eh->world_age = ptls->world_age; current_task->eh = eh; #ifdef ENABLE_TIMINGS - eh->timing_stack = current_task->timing_stack; + eh->timing_stack = ptls->timing_stack; #endif } diff --git a/src/task.c b/src/task.c index 1f6537e71038d..ce54b3882c284 100644 --- a/src/task.c +++ b/src/task.c @@ -519,9 +519,10 @@ JL_DLLEXPORT void jl_switch(void) ptls->finalizers_inhibited = 0; #ifdef ENABLE_TIMINGS - jl_timing_block_t *blk = ct->timing_stack; + jl_timing_block_t *blk = ptls->timing_stack; if (blk) jl_timing_block_stop(blk); + ptls->timing_stack = NULL; #endif ctx_switch(ptls); @@ -546,7 +547,8 @@ JL_DLLEXPORT void jl_switch(void) ptls->finalizers_inhibited = finalizers_inhibited; #ifdef ENABLE_TIMINGS - assert(blk == ct->timing_stack); + assert(ptls->timing_stack == NULL); + ptls->timing_stack = blk; if (blk) jl_timing_block_start(blk); #else @@ -600,7 +602,7 @@ static void JL_NORETURN throw_internal(jl_value_t *exception JL_MAYBE_UNROOTED) jl_handler_t *eh = ptls->current_task->eh; if (eh != NULL) { #ifdef ENABLE_TIMINGS - jl_timing_block_t *cur_block = ptls->current_task->timing_stack; + jl_timing_block_t *cur_block = ptls->timing_stack; while (cur_block && eh->timing_stack != cur_block) { cur_block = jl_pop_timing_block(cur_block); } @@ -701,9 +703,6 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion t->started = 0; t->prio = -1; t->tid = -1; -#ifdef ENABLE_TIMINGS - t->timing_stack = jl_root_timing; -#endif #if defined(JL_DEBUG_BUILD) if (!t->copy_stack) diff --git a/src/timing.c b/src/timing.c index 70fb8df213796..12f47bbfa0086 100644 --- a/src/timing.c +++ b/src/timing.c @@ -17,7 +17,7 @@ extern "C" { #error Timings are not supported on your compiler #endif -jl_timing_block_t *jl_root_timing; +static uint64_t t0; uint64_t jl_timing_data[(int)JL_TIMING_LAST] = {0}; const char *jl_timing_names[(int)JL_TIMING_LAST] = { @@ -28,32 +28,32 @@ const char *jl_timing_names[(int)JL_TIMING_LAST] = void jl_print_timings(void) { - uint64_t total_time = 0; + uint64_t total_time = cycleclock() - t0; + uint64_t root_time = total_time; for (int i = 0; i < JL_TIMING_LAST; i++) { - total_time += jl_timing_data[i]; + root_time -= jl_timing_data[i]; } + jl_timing_data[0] = root_time; for (int i = 0; i < JL_TIMING_LAST; i++) { if (jl_timing_data[i] != 0) - fprintf(stderr,"%-25s : %5.2f %% %" PRIu64 "\n", jl_timing_names[i], + fprintf(stderr, "%-25s : %5.2f %% %" PRIu64 "\n", jl_timing_names[i], 100 * (((double)jl_timing_data[i]) / total_time), jl_timing_data[i]); } } void jl_init_timing(void) { - jl_root_timing = (jl_timing_block_t*)malloc_s(sizeof(jl_timing_block_t)); - _jl_timing_block_init(jl_root_timing, JL_TIMING_ROOT); - jl_root_timing->prev = NULL; + t0 = cycleclock(); } void jl_destroy_timing(void) { - jl_timing_block_t *stack = jl_current_task ? jl_current_task->timing_stack : jl_root_timing; + jl_ptls_t ptls = jl_get_ptls_states(); + jl_timing_block_t *stack = ptls->timing_stack; while (stack) { _jl_timing_block_destroy(stack); stack = stack->prev; } - free(jl_root_timing); } jl_timing_block_t *jl_pop_timing_block(jl_timing_block_t *cur_block) diff --git a/src/timing.h b/src/timing.h index 2848f020a12c0..9a3307709a38f 100644 --- a/src/timing.h +++ b/src/timing.h @@ -23,7 +23,6 @@ extern "C" { #endif void jl_print_timings(void); jl_timing_block_t *jl_pop_timing_block(jl_timing_block_t *cur_block); -extern jl_timing_block_t *jl_root_timing; void jl_timing_block_start(jl_timing_block_t *cur_block); void jl_timing_block_stop(jl_timing_block_t *cur_block); #ifdef __cplusplus @@ -117,7 +116,8 @@ STATIC_INLINE uint64_t _jl_timing_block_init(jl_timing_block_t *block, int owner STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int owner) { uint64_t t = _jl_timing_block_init(block, owner); - jl_timing_block_t **prevp = jl_current_task ? &jl_current_task->timing_stack : &jl_root_timing; + jl_ptls_t ptls = jl_get_ptls_states(); + jl_timing_block_t **prevp = &ptls->timing_stack; block->prev = *prevp; if (block->prev) _jl_timing_block_stop(block->prev, t); @@ -126,9 +126,10 @@ STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int owner) { STATIC_INLINE void _jl_timing_block_destroy(jl_timing_block_t *block) { uint64_t t = cycleclock(); + jl_ptls_t ptls = jl_get_ptls_states(); _jl_timing_block_stop(block, t); jl_timing_data[block->owner] += block->total; - jl_timing_block_t **pcur = jl_current_task ? &jl_current_task->timing_stack : &jl_root_timing; + jl_timing_block_t **pcur = &ptls->timing_stack; assert(*pcur == block); *pcur = block->prev; if (block->prev)