Skip to content

Commit

Permalink
Measure compile time only when using time macros pt.2: handling when …
Browse files Browse the repository at this point in the history
…code under test throws and make compile timing thread-local (#38915)

* ensure compile timing disables in time & timev macros

* make comp measurement switching threadsafe

(cherry picked from commit 158e22f)
  • Loading branch information
IanButterworth authored and KristofferC committed Jan 6, 2021
1 parent 327f298 commit 55154b3
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 64 deletions.
78 changes: 43 additions & 35 deletions base/timing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -115,41 +115,44 @@ function format_bytes(bytes) # also used by InteractiveUtils
end
end

function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0)
function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, newline=false)
timestr = Ryu.writefixed(Float64(elapsedtime/1e9), 6)
length(timestr) < 10 && print(" "^(10 - length(timestr)))
print(timestr, " seconds")
parens = bytes != 0 || allocs != 0 || gctime > 0 || compile_time > 0
parens && print(" (")
if bytes != 0 || allocs != 0
allocs, ma = prettyprint_getunits(allocs, length(_cnt_units), Int64(1000))
if ma == 1
print(Int(allocs), _cnt_units[ma], allocs==1 ? " allocation: " : " allocations: ")
else
print(Ryu.writefixed(Float64(allocs), 2), _cnt_units[ma], " allocations: ")
end
print(format_bytes(bytes))
end
if gctime > 0
str = sprint() do io
print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "")
print(io, timestr, " seconds")
parens = bytes != 0 || allocs != 0 || gctime > 0 || compile_time > 0
parens && print(io, " (")
if bytes != 0 || allocs != 0
print(", ")
allocs, ma = prettyprint_getunits(allocs, length(_cnt_units), Int64(1000))
if ma == 1
print(io, Int(allocs), _cnt_units[ma], allocs==1 ? " allocation: " : " allocations: ")
else
print(io, Ryu.writefixed(Float64(allocs), 2), _cnt_units[ma], " allocations: ")
end
print(io, format_bytes(bytes))
end
print(Ryu.writefixed(Float64(100*gctime/elapsedtime), 2), "% gc time")
end
if compile_time > 0
if bytes != 0 || allocs != 0 || gctime > 0
print(", ")
if gctime > 0
if bytes != 0 || allocs != 0
print(io, ", ")
end
print(io, Ryu.writefixed(Float64(100*gctime/elapsedtime), 2), "% gc time")
end
print(Ryu.writefixed(Float64(100*compile_time/elapsedtime), 2), "% compilation time")
if compile_time > 0
if bytes != 0 || allocs != 0 || gctime > 0
print(io, ", ")
end
print(io, Ryu.writefixed(Float64(100*compile_time/elapsedtime), 2), "% compilation time")
end
parens && print(io, ")")
end
parens && print(")")
newline ? println(str) : print(str)
nothing
end

function timev_print(elapsedtime, diff::GC_Diff, compile_time)
allocs = gc_alloc_count(diff)
time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time)
print("\nelapsed time (ns): $elapsedtime\n")
time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, true)
print("elapsed time (ns): $elapsedtime\n")
padded_nonzero_print(diff.total_time, "gc time (ns)")
padded_nonzero_print(diff.allocd, "bytes allocated")
padded_nonzero_print(diff.poolalloc, "pool allocs")
Expand Down Expand Up @@ -200,13 +203,14 @@ macro time(ex)
local stats = gc_num()
local compile_elapsedtime = cumulative_compile_time_ns_before()
local elapsedtime = time_ns()
local val = $(esc(ex))
elapsedtime = time_ns() - elapsedtime
compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime
local val = try
$(esc(ex))
finally
elapsedtime = time_ns() - elapsedtime
compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime
end
local diff = GC_Diff(gc_num(), stats)
time_print(elapsedtime, diff.allocd, diff.total_time,
gc_alloc_count(diff), compile_elapsedtime)
println()
time_print(elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), compile_elapsedtime, true)
val
end
end
Expand Down Expand Up @@ -248,10 +252,14 @@ macro timev(ex)
local stats = gc_num()
local compile_elapsedtime = cumulative_compile_time_ns_before()
local elapsedtime = time_ns()
local val = $(esc(ex))
elapsedtime = time_ns() - elapsedtime
compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime
timev_print(elapsedtime, GC_Diff(gc_num(), stats), compile_elapsedtime)
local val = try
$(esc(ex))
finally
elapsedtime = time_ns() - elapsedtime
compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime
end
local diff = GC_Diff(gc_num(), stats)
timev_print(elapsedtime, diff, compile_elapsedtime)
val
end
end
Expand Down
14 changes: 8 additions & 6 deletions src/aotcompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,8 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
JL_GC_PUSH1(&src);
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = 0;
if (jl_measure_compile_time)
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
compiler_start_time = jl_hrtime();

CompilationPolicy policy = (CompilationPolicy) _policy;
Expand Down Expand Up @@ -417,8 +418,8 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
}

data->M = std::move(clone);
if (jl_measure_compile_time)
jl_cumulative_compile_time += (jl_hrtime() - compiler_start_time);
if (jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
JL_UNLOCK(&codegen_lock); // Might GC
return (void*)data;
}
Expand Down Expand Up @@ -896,7 +897,8 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
jl_llvm_functions_t decls;
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = 0;
if (jl_measure_compile_time)
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
compiler_start_time = jl_hrtime();
std::tie(m, decls) = jl_emit_code(mi, src, jlrettype, output);

Expand All @@ -921,8 +923,8 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
m.release(); // the return object `llvmf` will be the owning pointer
}
JL_GC_POP();
if (jl_measure_compile_time)
jl_cumulative_compile_time += (jl_hrtime() - compiler_start_time);
if (jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
JL_UNLOCK(&codegen_lock); // Might GC
if (F)
return F;
Expand Down
9 changes: 4 additions & 5 deletions src/gf.c
Original file line number Diff line number Diff line change
Expand Up @@ -3139,21 +3139,20 @@ int jl_has_concrete_subtype(jl_value_t *typ)
//static jl_mutex_t typeinf_lock;
#define typeinf_lock codegen_lock

uint8_t jl_measure_compile_time = 0;
uint64_t jl_cumulative_compile_time = 0;
static uint64_t inference_start_time = 0;

JL_DLLEXPORT void jl_typeinf_begin(void)
{
JL_LOCK(&typeinf_lock);
if (jl_measure_compile_time)
if (jl_measure_compile_time[jl_threadid()])
inference_start_time = jl_hrtime();
}

JL_DLLEXPORT void jl_typeinf_end(void)
{
if (typeinf_lock.count == 1 && jl_measure_compile_time)
jl_cumulative_compile_time += (jl_hrtime() - inference_start_time);
int tid = jl_threadid();
if (typeinf_lock.count == 1 && jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - inference_start_time);
JL_UNLOCK(&typeinf_lock);
}

Expand Down
38 changes: 22 additions & 16 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,16 @@ void jl_jit_globals(std::map<void *, GlobalVariable*> &globals)
extern "C" JL_DLLEXPORT
uint64_t jl_cumulative_compile_time_ns_before()
{
jl_measure_compile_time = 1;
return jl_cumulative_compile_time;
int tid = jl_threadid();
jl_measure_compile_time[tid] = 1;
return jl_cumulative_compile_time[tid];
}
extern "C" JL_DLLEXPORT
uint64_t jl_cumulative_compile_time_ns_after()
{
jl_measure_compile_time = 0;
return jl_cumulative_compile_time;
int tid = jl_threadid();
jl_measure_compile_time[tid] = 0;
return jl_cumulative_compile_time[tid];
}

// this generates llvm code for the lambda info
Expand Down Expand Up @@ -231,7 +233,8 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt
{
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = 0;
if (jl_measure_compile_time)
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
compiler_start_time = jl_hrtime();
jl_codegen_params_t params;
jl_codegen_params_t *pparams = (jl_codegen_params_t*)p;
Expand All @@ -255,8 +258,8 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt
if (success && llvmmod == NULL)
jl_add_to_ee(std::unique_ptr<Module>(into));
}
if (codegen_lock.count == 1 && jl_measure_compile_time)
jl_cumulative_compile_time += (jl_hrtime() - compiler_start_time);
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
JL_UNLOCK(&codegen_lock);
return success;
}
Expand Down Expand Up @@ -314,7 +317,8 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
{
JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
uint64_t compiler_start_time = 0;
if (jl_measure_compile_time)
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
compiler_start_time = jl_hrtime();
// if we don't have any decls already, try to generate it now
jl_code_info_t *src = NULL;
Expand Down Expand Up @@ -352,8 +356,8 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
else {
codeinst = NULL;
}
if (codegen_lock.count == 1 && jl_measure_compile_time)
jl_cumulative_compile_time += (jl_hrtime() - compiler_start_time);
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
JL_UNLOCK(&codegen_lock);
JL_GC_POP();
return codeinst;
Expand All @@ -367,7 +371,8 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
}
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = 0;
if (jl_measure_compile_time)
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
compiler_start_time = jl_hrtime();
if (unspec->invoke == NULL) {
jl_code_info_t *src = NULL;
Expand Down Expand Up @@ -395,8 +400,8 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
}
JL_GC_POP();
}
if (codegen_lock.count == 1 && jl_measure_compile_time)
jl_cumulative_compile_time += (jl_hrtime() - compiler_start_time);
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
JL_UNLOCK(&codegen_lock); // Might GC
}

Expand All @@ -419,7 +424,8 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
// so create an exception here so we can print pretty our lies
JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
uint64_t compiler_start_time = 0;
if (jl_measure_compile_time)
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
compiler_start_time = jl_hrtime();
specfptr = (uintptr_t)codeinst->specptr.fptr;
if (specfptr == 0) {
Expand All @@ -444,8 +450,8 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
}
JL_GC_POP();
}
if (jl_measure_compile_time)
jl_cumulative_compile_time += (jl_hrtime() - compiler_start_time);
if (jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
JL_UNLOCK(&codegen_lock);
}
if (specfptr != 0)
Expand Down
4 changes: 2 additions & 2 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@ static inline uint64_t cycleclock(void)

#include "timing.h"

extern uint8_t jl_measure_compile_time;
extern uint64_t jl_cumulative_compile_time;
extern uint8_t *jl_measure_compile_time;
extern uint64_t *jl_cumulative_compile_time;

#ifdef _COMPILER_MICROSOFT_
# define jl_return_address() ((uintptr_t)_ReturnAddress())
Expand Down
4 changes: 4 additions & 0 deletions src/threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@ jl_get_ptls_states_func jl_get_ptls_states_getter(void)
#endif

jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED;
uint8_t *jl_measure_compile_time = NULL;
uint64_t *jl_cumulative_compile_time = NULL;

// return calling thread's ID
// Also update the suspended_threads list in signals-mach when changing the
Expand Down Expand Up @@ -399,6 +401,8 @@ void jl_init_threading(void)
jl_n_threads = (uint64_t)strtol(cp, NULL, 10);
if (jl_n_threads <= 0)
jl_n_threads = 1;
jl_measure_compile_time = realloc( jl_measure_compile_time, jl_n_threads * sizeof *jl_measure_compile_time );
jl_cumulative_compile_time = realloc( jl_cumulative_compile_time, jl_n_threads * sizeof *jl_cumulative_compile_time );
#ifndef __clang_analyzer__
jl_all_tls_states = (jl_ptls_t*)calloc(jl_n_threads, sizeof(void*));
#endif
Expand Down

0 comments on commit 55154b3

Please sign in to comment.