Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add notice of excessive scheduling time to time macro etc. #55103

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ New library features
content is fully written, then call `closewrite` manually to avoid
data-races. Or use the callback form of `open` to have all that handled
automatically.
* `@time` now shows if the time spent on scheduling tasks (not including sleeping/waiting) is more
than 10% of the total time, as a percentage. ([#55103])
* `@timed` now additionally returns the elapsed compilation and recompilation time ([#52889])
* `escape_string` takes additional keyword arguments `ascii=true` (to escape all
non-ASCII characters) and `fullhex=true` (to require full 4/8-digit hex numbers
Expand Down
4 changes: 4 additions & 0 deletions base/Base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,10 @@ include("weakkeydict.jl")
# ScopedValues
include("scopedvalues.jl")

# used by task.jl
const task_times_per_thread = ScopedValues.ScopedValue{Vector{UInt64}}() # time_ns always returns UInt64
const sleep_times_per_thread = ScopedValues.ScopedValue{Vector{UInt64}}()

# metaprogramming
include("meta.jl")

Expand Down
24 changes: 23 additions & 1 deletion base/task.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1098,7 +1098,18 @@ end

function try_yieldto(undo)
try
# when timing time spent in the scheduler we time the time spent in the task
# then subtract the elapsed time in `@timed` from the total time to get the scheduler time
t, ts = isassigned(task_times_per_thread) ? (time_ns(), task_times_per_thread[]) : (nothing, nothing)
ccall(:jl_switch, Cvoid, ())
if ts !== nothing
t = time_ns() - t
tid = Threads.threadid()
if tid <= length(ts)
ts[tid] += t
end
# TODO: grow the array if needed i.e. if a thread is added within the `@timed` block
end
catch
undo(ccall(:jl_get_next_task, Ref{Task}, ()))
rethrow()
Expand Down Expand Up @@ -1170,8 +1181,19 @@ end

function wait()
GC.safepoint()
W = workqueue_for(Threads.threadid())
tid = Threads.threadid()
W = workqueue_for(tid)
# optional sleep timing
t, ts = isassigned(sleep_times_per_thread) ? (time_ns(), sleep_times_per_thread[]) : (nothing, nothing)
poptask(W)
if ts !== nothing # sleep timing is enabled
t = time_ns() - t
if tid <= length(ts)
ts[tid] += t
end
# TODO: grow the array if needed i.e. if a thread is added within the `@timed` block
end

result = try_yieldto(ensure_rescheduled)
process_events()
# return when we come out of the queue
Expand Down
97 changes: 64 additions & 33 deletions base/timing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -196,17 +196,19 @@ function format_bytes(bytes; binary=true) # also used by InteractiveUtils
end
end

function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_conflicts=0, compile_time=0, recompile_time=0, newline=false;
msg::Union{String,Nothing}=nothing)
function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_conflicts=0, compile_time=0, recompile_time=0, sched_time_avg=0,
newline=false; msg::Union{String,Nothing}=nothing)
timestr = Ryu.writefixed(Float64(elapsedtime/1e9), 6)
wall_time_sched_perc = sched_time_avg / (elapsedtime / 1e9)
sched_thresh = 0.1
str = sprint() do io
if msg isa String
print(io, msg, ": ")
else
print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "")
end
print(io, timestr, " seconds")
parens = bytes != 0 || allocs != 0 || gctime > 0 || lock_conflicts > 0 || compile_time > 0
parens = bytes != 0 || allocs != 0 || gctime > 0 || wall_time_sched_perc > sched_thresh || lock_conflicts > 0 || compile_time > 0
parens && print(io, " (")
if bytes != 0 || allocs != 0
allocs, ma = prettyprint_getunits(allocs, length(_cnt_units), Int64(1000))
Expand All @@ -223,15 +225,21 @@ function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_confl
end
print(io, Ryu.writefixed(Float64(100*gctime/elapsedtime), 2), "% gc time")
end
if lock_conflicts > 0
if wall_time_sched_perc > sched_thresh
if bytes != 0 || allocs != 0 || gctime > 0
print(io, ", ")
end
print(io, Ryu.writefixed(Float64(100*wall_time_sched_perc), 2), "% scheduling time")
end
if lock_conflicts > 0
if bytes != 0 || allocs != 0 || gctime > 0 || wall_time_sched_perc > sched_thresh
print(io, ", ")
end
plural = lock_conflicts == 1 ? "" : "s"
print(io, lock_conflicts, " lock conflict$plural")
end
if compile_time > 0
if bytes != 0 || allocs != 0 || gctime > 0 || lock_conflicts > 0
if bytes != 0 || allocs != 0 || gctime > 0 || wall_time_sched_perc > sched_thresh || lock_conflicts > 0
print(io, ", ")
end
print(io, Ryu.writefixed(Float64(100*compile_time/elapsedtime), 2), "% compilation time")
Expand Down Expand Up @@ -285,7 +293,9 @@ A macro to execute an expression, printing the time it took to execute, the numb
allocations, and the total number of bytes its execution caused to be allocated, before
returning the value of the expression. Any time spent garbage collecting (gc), compiling
new code, or recompiling invalidated code is shown as a percentage. Any lock conflicts
where a [`ReentrantLock`](@ref) had to wait are shown as a count.
where a [`ReentrantLock`](@ref) had to wait are shown as a count. If the time spent on
scheduling tasks (not including sleeping/waiting) is more than 10% of the total time it
is also shown as a percentage.

Optionally provide a description string to print before the time report.

Expand All @@ -309,6 +319,9 @@ See also [`@showtime`](@ref), [`@timev`](@ref), [`@timed`](@ref), [`@elapsed`](@
!!! compat "Julia 1.11"
The reporting of any lock conflicts was added in Julia 1.11.

!!! compat "Julia 1.12"
The reporting of excessive scheduling time was added in Julia 1.12.

```julia-repl
julia> x = rand(10,10);

Expand Down Expand Up @@ -346,7 +359,7 @@ macro time(msg, ex)
local ret = @timed $(esc(ex))
local _msg = $(esc(msg))
local _msg_str = _msg === nothing ? _msg : string(_msg)
time_print(stdout, ret.time*1e9, ret.gcstats.allocd, ret.gcstats.total_time, gc_alloc_count(ret.gcstats), ret.lock_conflicts, ret.compile_time*1e9, ret.recompile_time*1e9, true; msg=_msg_str)
time_print(stdout, ret.time*1e9, ret.gcstats.allocd, ret.gcstats.total_time, gc_alloc_count(ret.gcstats), ret.lock_conflicts, ret.compile_time*1e9, ret.recompile_time*1e9, ret.sched_time_avg, true; msg=_msg_str)
ret.value
end
end
Expand Down Expand Up @@ -555,8 +568,9 @@ end

A macro to execute an expression, and return the value of the expression, elapsed time in seconds,
total bytes allocated, garbage collection time, an object with various memory allocation
counters, compilation time in seconds, and recompilation time in seconds. Any lock conflicts
where a [`ReentrantLock`](@ref) had to wait are shown as a count.
counters, compilation time in seconds, with recompilation time in seconds, any lock conflicts
where a [`ReentrantLock`](@ref) had to wait as a count, and the average time spent on
scheduling tasks (not including sleeping/waiting) across threads.

In some cases the system will look inside the `@timed` expression and compile some of the
called code before execution of the top-level expression begins. When that happens, some
Expand Down Expand Up @@ -596,33 +610,50 @@ julia> stats.recompile_time

!!! compat "Julia 1.11"
The `lock_conflicts`, `compile_time`, and `recompile_time` fields were added in Julia 1.11.

!!! compat "Julia 1.12"
The `sched_time_avg` field was added in Julia 1.11.
"""
macro timed(ex)
quote
Experimental.@force_compile
Threads.lock_profiling(true)
local lock_conflicts = Threads.LOCK_CONFLICT_COUNT[]
local stats = gc_num()
local elapsedtime = time_ns()
cumulative_compile_timing(true)
local compile_elapsedtimes = cumulative_compile_time_ns()
local val = @__tryfinally($(esc(ex)),
(elapsedtime = time_ns() - elapsedtime;
cumulative_compile_timing(false);
compile_elapsedtimes = cumulative_compile_time_ns() .- compile_elapsedtimes;
lock_conflicts = Threads.LOCK_CONFLICT_COUNT[] - lock_conflicts;
Threads.lock_profiling(false))
)
local diff = GC_Diff(gc_num(), stats)
(
value=val,
time=elapsedtime/1e9,
bytes=diff.allocd,
gctime=diff.total_time/1e9,
gcstats=diff,
lock_conflicts=lock_conflicts,
compile_time=compile_elapsedtimes[1]/1e9,
recompile_time=compile_elapsedtimes[2]/1e9
)
ScopedValues.@with task_times_per_thread => zeros(UInt, Threads.maxthreadid()) sleep_times_per_thread => zeros(UInt, Threads.maxthreadid()) begin
Experimental.@force_compile
Threads.lock_profiling(true)
local lock_conflicts = Threads.LOCK_CONFLICT_COUNT[]
local stats = gc_num()
local elapsedtime = time_ns()
cumulative_compile_timing(true)
local compile_elapsedtimes = cumulative_compile_time_ns()
local val = @__tryfinally($(esc(ex)),
(elapsedtime = time_ns() - elapsedtime;
cumulative_compile_timing(false);
compile_elapsedtimes = cumulative_compile_time_ns() .- compile_elapsedtimes;
lock_conflicts = Threads.LOCK_CONFLICT_COUNT[] - lock_conflicts;
Threads.lock_profiling(false))
)
local diff = GC_Diff(gc_num(), stats)
local sched_times = Int64[]
for i in 1:length(task_times_per_thread[])
# filter out zeros in task timers which can only happen if nothing was scheduled
if task_times_per_thread[][i] != 0
# subtract task and sleep times from global elapsed time to get scheduling time per thread
push!(sched_times, Int64(elapsedtime) - Int64(task_times_per_thread[][i]) - Int64(sleep_times_per_thread[][i]))
end
end
local sched_time_avg = isempty(sched_times) ? 0 : sum(sched_times) / length(sched_times)

(
value=val,
time=elapsedtime/1e9,
bytes=diff.allocd,
gctime=diff.total_time/1e9,
gcstats=diff,
lock_conflicts=lock_conflicts,
compile_time=compile_elapsedtimes[1]/1e9,
recompile_time=compile_elapsedtimes[2]/1e9,
sched_time_avg=sched_time_avg/1e9
)
end
end
end
9 changes: 7 additions & 2 deletions test/scopedvalues.jl
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,16 @@ end
@testset "show" begin
@test sprint(show, ScopedValue{Int}()) == "Base.ScopedValues.ScopedValue{$Int}(undefined)"
@test sprint(show, sval) == "Base.ScopedValues.ScopedValue{$Int}(1)"
@test sprint(show, Core.current_scope()) == "nothing"
if Core.current_scope() === nothing
# Base.runtests uses @timed which introduces a scope for scheduler timing
@test sprint(show, Core.current_scope()) == "nothing"
end
with(sval => 2.0) do
@test sprint(show, sval) == "Base.ScopedValues.ScopedValue{$Int}(2)"
objid = sprint(show, Base.objectid(sval))
@test sprint(show, Core.current_scope()) == "Base.ScopedValues.Scope(Base.ScopedValues.ScopedValue{$Int}@$objid => 2)"
# Base.runtests uses @timed which introduces a scope for scheduler timing
@test startswith(sprint(show, Core.current_scope()), "Base.ScopedValues.Scope(")
@test contains(sprint(show, Core.current_scope()), "Base.ScopedValues.ScopedValue{$Int}@$objid => 2")
end
end

Expand Down