Skip to content

Commit

Permalink
Merge pull request #11186 from ScottPJones/spj/time
Browse files Browse the repository at this point in the history
Add number of allocations to time/timed macros, fix realloc calculations
  • Loading branch information
tkelman committed May 16, 2015
2 parents f043f86 + c2ae378 commit 05702c7
Show file tree
Hide file tree
Showing 3 changed files with 194 additions and 45 deletions.
1 change: 1 addition & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1353,6 +1353,7 @@ export
# profiling
@time,
@timed,
@timev,
@elapsed,
@allocated,
@profile,
Expand Down
179 changes: 147 additions & 32 deletions base/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,60 @@
# high-resolution relative time, in nanoseconds
time_ns() = ccall(:jl_hrtime, UInt64, ())

# This type must be kept in sync with the C struct in src/gc.c
immutable GC_Num
allocd ::Int64
freed ::Int64
malloc ::UInt64
realloc ::UInt64
poolalloc ::UInt64
freecall ::UInt64
total_time ::UInt64
total_allocd::UInt64
since_sweep ::UInt64
collect ::Csize_t
pause ::Cint
full_sweep ::Cint
end

gc_num() = ccall(:jl_gc_num, GC_Num, ())

# This type is to represent differences in the counters, so fields may be negative
immutable GC_Diff
allocd ::Int64
freed ::Int64
malloc ::Int64
realloc ::Int64
poolalloc ::Int64
freecall ::Int64
total_time ::Int64
total_allocd::Int64
since_sweep ::Int64
pause ::Int64
full_sweep ::Int64
end

function GC_Diff(new::GC_Num, old::GC_Num)
return GC_Diff((new.allocd + Int64(new.collect)) - (old.allocd + Int64(old.collect)),
new.freed - old.freed,
Int64(new.malloc - old.malloc),
Int64(new.realloc - old.realloc),
Int64(new.poolalloc - old.poolalloc),
Int64(new.freecall - old.freecall),
Int64(new.total_time - old.total_time),
Int64(new.total_allocd - old.total_allocd),
Int64(new.since_sweep - old.since_sweep),
new.pause - old.pause,
new.full_sweep - old.full_sweep)
end


# total time spend in garbage collection, in nanoseconds
gc_time_ns() = ccall(:jl_gc_total_hrtime, UInt64, ())

# total number of bytes allocated so far
gc_bytes() = ccall(:jl_gc_total_bytes, Int64, ())

gc_num_pause() = ccall(:jl_gc_num_pause, Int64, ())
gc_num_full_sweep() = ccall(:jl_gc_num_full_sweep, Int64, ())

function tic()
t0 = time_ns()
task_local_storage(:TIMERS, (t0, get(task_local_storage(), :TIMERS, ())))
Expand All @@ -40,34 +85,107 @@ function toc()
end

# print elapsed time, return expression value
const _units = ["bytes", "kB", "MB"]
function time_print(t, b, g, np, nfs)
i = 1
while b > 1024 && i < length(_units)
b = div(b, 1024)
i += 1
const _mem_units = ["bytes", "KB", "MB", "GB", "TB", "PB"]
const _cnt_units = ["", " k", " M", " G", " T", " P"]
function prettyprint_getunits(value, numunits, factor) # this really should be a private function
c1 = factor
c2 = c1 * c1
if value <= c1*100 ; return (value, 1) ; end
unit = 2
while value > c2*100 && (unit < numunits)
c1 = c2
c2 *= factor
unit += 1
end
if 0 < g
@printf("elapsed time: %s seconds (%d %s allocated, %.2f%% gc time in %d pauses with %d full sweep)\n", t/1e9, b, _units[i], 100*g/t, np, nfs)
return div(value+(c1>>>1),c1), unit
end

const _sec_units = ["nanoseconds ", "microseconds", "milliseconds", "seconds "]
function prettyprint_nanoseconds(value::UInt64) # this really should be a private function
if value < 1000
return (1, value, 0) # nanoseconds
elseif value < 1000000
mt = 2
elseif value < 1000000000
mt = 3
# round to nearest # of microseconds
value = div(value+500,1000)
elseif value < 1000000000000
mt = 4
# round to nearest # of milliseconds
value = div(value+500000,1000000)
else
@printf("elapsed time: %s seconds (%d %s allocated)\n", t/1e9, b, _units[i])
# round to nearest # of seconds
return (4, div(value+500000000,1000000000), 0)
end
frac::UInt64 = div(value,1000)
return (mt, frac, value-(frac*1000))
end

function padded_nonzero_print(value,str) # this really should be a private function
if value != 0
blanks = " "[1:16-length(str)]
println("$str:$blanks$value")
end
end

function time_print(elapsedtime, bytes, gctime, allocs)
mt, pptime, fraction = prettyprint_nanoseconds(elapsedtime)
(fraction != 0) ? @printf("%4d.%03d %s", pptime, fraction, _sec_units[mt]) : @printf("%8d %s", pptime, _sec_units[mt])
if bytes != 0 || allocs != 0
bytes, mb = prettyprint_getunits(bytes, length(_mem_units), 1024)
allocs, ma = prettyprint_getunits(allocs, length(_cnt_units), 1000)
@printf(" (%d%s allocation%s: %d %s", allocs, _cnt_units[ma], allocs==1 ? "" : "s", bytes, _mem_units[mb])
if gctime > 0
@printf(", %.2f%% gc time", 100*gctime/elapsedtime)
end
print(")")
elseif gctime > 0
@printf(", %.2f%% gc time", 100*gctime/elapsedtime)
end
println()
end

function timev_print(elapsedtime, diff::GC_Diff)
bytes = diff.total_allocd + diff.allocd
allocs = diff.malloc + diff.realloc + diff.poolalloc
time_print(elapsedtime, bytes, diff.total_time, allocs)
print("elapsed time: $elapsedtime nanoseconds\n")
padded_nonzero_print(diff.total_time, "gc time")
padded_nonzero_print(bytes, "bytes allocated")
padded_nonzero_print(diff.allocd, "allocated")
padded_nonzero_print(diff.freed, "freed")
padded_nonzero_print(diff.malloc, "mallocs")
padded_nonzero_print(diff.realloc, "reallocs")
padded_nonzero_print(diff.poolalloc, "poolallocs")
padded_nonzero_print(diff.freecall, "free calls")
padded_nonzero_print(diff.total_allocd, "total allocated")
padded_nonzero_print(diff.since_sweep, "since sweep")
padded_nonzero_print(diff.pause, "pause")
padded_nonzero_print(diff.full_sweep, "full sweep")
end

macro time(ex)
quote
local b0 = gc_bytes()
local t0 = time_ns()
local g0 = gc_time_ns()
local n0 = gc_num_pause()
local nfs0 = gc_num_full_sweep()
local stats = gc_num()
local elapsedtime = time_ns()
local val = $(esc(ex))
local nfs1 = gc_num_full_sweep()
local n1 = gc_num_pause()
local g1 = gc_time_ns()
local t1 = time_ns()
local b1 = gc_bytes()
time_print(t1-t0, b1-b0, g1-g0, n1-n0, nfs1-nfs0)
elapsedtime = time_ns() - elapsedtime
local diff = GC_Diff(gc_num(), stats)
local bytes = diff.total_allocd + diff.allocd
local allocs = diff.malloc + diff.realloc + diff.poolalloc
time_print(elapsedtime, bytes, diff.total_time, allocs)
val
end
end

macro timev(ex)
quote
local stats = gc_num()
local elapsedtime = time_ns()
local val = $(esc(ex))
elapsedtime = time_ns() - elapsedtime
timev_print(elapsedtime, GC_Diff(gc_num(), stats))
val
end
end
Expand All @@ -89,8 +207,7 @@ macro allocated(ex)
function f()
b0 = gc_bytes()
$(esc(ex))
b1 = gc_bytes()
b1-b0
gc_bytes() - b0
end
f()
end
Expand All @@ -100,14 +217,12 @@ end
# print nothing, return value, elapsed time, bytes allocated & gc time
macro timed(ex)
quote
local b0 = gc_bytes()
local t0 = time_ns()
local g0 = gc_time_ns()
local stats = gc_num()
local elapsedtime = time_ns()
local val = $(esc(ex))
local g1 = gc_time_ns()
local t1 = time_ns()
local b1 = gc_bytes()
val, (t1-t0)/1e9, b1-b0, (g1-g0)/1e9
elapsedtime = time_ns() - elapsedtime
diff = GC_Diff(gc_num(), stats)
val, elapsedtime/1e9, diff.total_allocd + diff.allocd, diff.total_time/1e9, diff
end
end

Expand Down
59 changes: 46 additions & 13 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,34 @@ extern "C" {
#define jl_valueof(v) (&((jl_taggedvalue_t*)(v))->value)

int jl_in_gc; // referenced from switchto task.c
static int64_t allocd_bytes;
static int64_t freed_bytes;

// This struct must be kept in sync with the Julia type of the same name in base/util.jl
typedef struct {
int64_t allocd;
int64_t freed;
uint64_t malloc;
uint64_t realloc;
uint64_t poolalloc;
uint64_t freecall;
uint64_t total_time;
uint64_t total_allocd;
uint64_t since_sweep;
size_t collect;
int pause;
int full_sweep;
} GC_Num;

static GC_Num gc_num = {0,0,0,0,0,0,0,0,0,0,0,0};

#define collect_interval gc_num.collect
#define n_pause gc_num.pause
#define n_full_sweep gc_num.full_sweep
#define allocd_bytes gc_num.allocd
#define freed_bytes gc_num.freed
#define total_gc_time gc_num.total_time
#define total_allocd_bytes gc_num.total_allocd
#define allocd_bytes_since_sweep gc_num.since_sweep

static long system_page_size;

// malloc wrappers, aligned allocation
Expand Down Expand Up @@ -310,7 +336,6 @@ static size_t max_collect_interval = 1250000000UL;
#define default_collect_interval (3200*1024*sizeof(void*))
static size_t max_collect_interval = 500000000UL;
#endif
static size_t collect_interval;

#define HEAP_DECL static

Expand Down Expand Up @@ -369,9 +394,6 @@ static bigval_t *big_objects_marked = NULL;

// global variables for GC stats

static int64_t total_allocd_bytes = 0;
static int64_t allocd_bytes_since_sweep = 0;
static uint64_t total_gc_time = 0;
#define NS_TO_S(t) ((double)(t/1000)/(1000*1000))
#define NS2MS(t) ((double)(t/1000)/1000)
static int64_t live_bytes = 0;
Expand All @@ -391,8 +413,6 @@ static uint64_t total_sweep_time=0;
static uint64_t total_mark_time=0;
static uint64_t total_fin_time=0;
#endif
static int n_pause = 0;
static int n_full_sweep = 0;
int sweeping = 0;

// manipulating mark bits
Expand Down Expand Up @@ -1032,6 +1052,7 @@ static inline void *__pool_alloc(pool_t* p, int osize, int end_offset)
jl_gc_collect(0);
//allocd_bytes += osize;
}
gc_num.poolalloc++;
// first try to use the freelist
v = p->freelist;
if (v) {
Expand Down Expand Up @@ -2071,8 +2092,7 @@ DLLEXPORT int jl_gc_is_enabled(void) { return is_gc_enabled; }

DLLEXPORT int64_t jl_gc_total_bytes(void) { return total_allocd_bytes + allocd_bytes + collect_interval; }
DLLEXPORT uint64_t jl_gc_total_hrtime(void) { return total_gc_time; }
DLLEXPORT int64_t jl_gc_num_pause(void) { return n_pause; }
DLLEXPORT int64_t jl_gc_num_full_sweep(void) { return n_full_sweep; }
DLLEXPORT GC_Num jl_gc_num(void) { return gc_num; }

int64_t diff_gc_total_bytes(void)
{
Expand Down Expand Up @@ -2665,6 +2685,7 @@ DLLEXPORT jl_value_t *allocobj(size_t sz)
if (allocsz < sz) // overflow in adding offs, size was "negative"
jl_throw(jl_memory_exception);
allocd_bytes += allocsz;
gc_num.alloc++;
return jl_valueof(malloc(allocsz));
}
int64_t diff_gc_total_bytes(void)
Expand All @@ -2688,6 +2709,7 @@ DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
{
maybe_collect();
allocd_bytes += sz;
gc_num.malloc++;
void *b = malloc(sz);
if (b == NULL)
jl_throw(jl_memory_exception);
Expand All @@ -2698,12 +2720,18 @@ DLLEXPORT void jl_gc_counted_free(void *p, size_t sz)
{
free(p);
freed_bytes += sz;
gc_num.freecall++;
}

DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz)
{
maybe_collect();
allocd_bytes += (sz-old);

if (sz < old)
freed_bytes += (old - sz);
else
allocd_bytes += (sz - old);
gc_num.realloc++;
void *b = realloc(p, sz);
if (b == NULL)
jl_throw(jl_memory_exception);
Expand All @@ -2713,10 +2741,11 @@ DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t
DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
{
maybe_collect();
allocd_bytes += sz;
size_t allocsz = LLT_ALIGN(sz, 16);
if (allocsz < sz) // overflow in adding offs, size was "negative"
jl_throw(jl_memory_exception);
allocd_bytes += allocsz;
gc_num.malloc++;
void *b = malloc_a16(allocsz);
if (b == NULL)
jl_throw(jl_memory_exception);
Expand All @@ -2738,7 +2767,11 @@ DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, int isal
}
else
#endif
allocd_bytes += allocsz - oldsz;
if (allocsz < oldsz)
freed_bytes += (oldsz - allocsz);
else
allocd_bytes += (allocsz - oldsz);
gc_num.realloc++;

void *b;
if (isaligned)
Expand Down

0 comments on commit 05702c7

Please sign in to comment.