Skip to content

Commit

Permalink
Add number of allocations to time/timed macros, fix realloc calculations
Browse files Browse the repository at this point in the history
Display realloc & pool allocations also

Display combined allocations

Refactor code to use structure, clean up

Remove trailing space

Add warnings to keep struct/type GC_Num in sync

Correct Julia structure to match C structure

Make GC_Num immutable, pretty print results, add  verbose macro

Further enhancements of  and  output

Further improvements of output

Improve time output

Use long names for seconds

Fix typo in timed macro

Fix InexactError

Improve time/timed macros, add timev macro
  • Loading branch information
ScottPJones authored and Scott Paul Jones committed May 14, 2015
1 parent ff35d98 commit 7582922
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 45 deletions.
1 change: 1 addition & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1353,6 +1353,7 @@ export
# profiling
@time,
@timed,
@timev,
@elapsed,
@allocated,
@profile,
Expand Down
179 changes: 147 additions & 32 deletions base/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,60 @@
# high-resolution relative time, in nanoseconds
time_ns() = ccall(:jl_hrtime, UInt64, ())

# This type must be kept in sync with the C struct in src/gc.c
immutable GC_Num
allocd ::Int64
freed ::Int64
malloc ::UInt64
realloc ::UInt64
poolalloc ::UInt64
freecall ::UInt64
total_time ::UInt64
total_allocd::UInt64
since_sweep ::UInt64
collect ::Csize_t
pause ::Cint
full_sweep ::Cint
end

gc_num() = ccall(:jl_gc_num, GC_Num, ())

# This type is to represent differences in the counters, so fields may be negative
immutable GC_Diff
allocd ::Int64
freed ::Int64
malloc ::Int64
realloc ::Int64
poolalloc ::Int64
freecall ::Int64
total_time ::Int64
total_allocd::Int64
since_sweep ::Int64
pause ::Int64
full_sweep ::Int64
end

function GC_Diff(new::GC_Num, old::GC_Num)
return GC_Diff(new.allocd + Int64(new.collect) - old.allocd + Int64(old.collect),
new.freed - old.freed,
Int64(new.malloc - old.malloc),
Int64(new.realloc - old.realloc),
Int64(new.poolalloc - old.poolalloc),
Int64(new.freecall - old.freecall),
Int64(new.total_time - old.total_time),
Int64(new.total_allocd - old.total_allocd),
Int64(new.since_sweep - old.since_sweep),
new.pause - old.pause,
new.full_sweep - old.full_sweep)
end


# total time spend in garbage collection, in nanoseconds
gc_time_ns() = ccall(:jl_gc_total_hrtime, UInt64, ())

# total number of bytes allocated so far
gc_bytes() = ccall(:jl_gc_total_bytes, Int64, ())

gc_num_pause() = ccall(:jl_gc_num_pause, Int64, ())
gc_num_full_sweep() = ccall(:jl_gc_num_full_sweep, Int64, ())

function tic()
t0 = time_ns()
task_local_storage(:TIMERS, (t0, get(task_local_storage(), :TIMERS, ())))
Expand All @@ -40,34 +85,107 @@ function toc()
end

# print elapsed time, return expression value
const _units = ["bytes", "kB", "MB"]
function time_print(t, b, g, np, nfs)
i = 1
while b > 1024 && i < length(_units)
b = div(b, 1024)
i += 1
const _mem_units = ["bytes", "KB", "MB", "GB", "TB", "PB"]
const _cnt_units = ["", "k", "M", "G", "T", "P"]
function getunits(value, numunits, factor)
c1 = factor
c2 = c1 * c1
if value <= c1*100 ; return (value, 1) ; end
unit = 2
while value > c2*100 && (unit < numunits)
c1 = c2
c2 *= factor
unit += 1
end
if 0 < g
@printf("elapsed time: %s seconds (%d %s allocated, %.2f%% gc time in %d pauses with %d full sweep)\n", t/1e9, b, _units[i], 100*g/t, np, nfs)
return div(value+(c1>>>1),c1), unit
end

const _sec_units = ["nanoseconds ", "microseconds", "milliseconds", "seconds "]
function ppnanoseconds(value::UInt64)
if value < 1000
return (1, value, 0) # nanoseconds
elseif value < 1000000
mt = 2
elseif value < 1000000000
mt = 3
# round to nearest # of microseconds
value = div(value+500,1000)
elseif value < 1000000000000
mt = 4
# round to nearest # of milliseconds
value = div(value+500000,1000000)
else
@printf("elapsed time: %s seconds (%d %s allocated)\n", t/1e9, b, _units[i])
# round to nearest # of seconds
return (4, div(value+500000000,1000000000), 0)
end
frac::UInt64 = div(value,1000)
return (mt, frac, value-(frac*1000))
end

function printnz(value,str)
if value != 0
blanks = " "[1:16-length(str)]
println("$str:$blanks$value")
end
end

function time_print(elapsedtime, bytes, gctime, allocs)
mt, pptime, fraction = ppnanoseconds(elapsedtime)
(fraction != 0) ? @printf("%4d.%03d %s", pptime, fraction, _sec_units[mt]) : @printf("%8d %s", pptime, _sec_units[mt])
if bytes != 0 || allocs != 0
bytes, mb = getunits(bytes, length(_mem_units), 1024)
allocs, ma = getunits(allocs, length(_cnt_units), 1000)
@printf(" (%d %s allocation%s: %d %s", allocs, _cnt_units[ma], allocs==1 ? "" : "s", bytes, _mem_units[mb])
if gctime > 0
@printf(", %.2f%% gc time", 100*gctime/elapsedtime)
end
print(")")
elseif gctime > 0
@printf(", %.2f%% gc time", 100*gctime/elapsedtime)
end
println()
end

function timev_print(elapsedtime, diff::GC_Diff)
bytes = diff.total_allocd + diff.allocd
allocs = diff.malloc + diff.realloc + diff.poolalloc
time_print(elapsedtime, bytes, diff.total_time, allocs)
print("elapsed time: $elapsedtime nanoseconds\n")
printnz(diff.total_time, "gc time")
printnz(bytes, "bytes allocated")
printnz(diff.allocd, "allocated")
printnz(diff.freed, "freed")
printnz(diff.malloc, "mallocs")
printnz(diff.realloc, "reallocs")
printnz(diff.poolalloc, "poolallocs")
printnz(diff.freecall, "free calls")
printnz(diff.total_allocd, "total allocated")
printnz(diff.since_sweep, "since sweep")
printnz(diff.pause, "pause")
printnz(diff.full_sweep, "full sweep")
end

macro time(ex)
quote
local b0 = gc_bytes()
local t0 = time_ns()
local g0 = gc_time_ns()
local n0 = gc_num_pause()
local nfs0 = gc_num_full_sweep()
local stats = gc_num()
local elapsedtime = time_ns()
local val = $(esc(ex))
local nfs1 = gc_num_full_sweep()
local n1 = gc_num_pause()
local g1 = gc_time_ns()
local t1 = time_ns()
local b1 = gc_bytes()
time_print(t1-t0, b1-b0, g1-g0, n1-n0, nfs1-nfs0)
elapsedtime = time_ns() - elapsedtime
local diff = GC_Diff(gc_num(), stats)
local bytes = diff.total_allocd + diff.allocd
local allocs = diff.malloc + diff.realloc + diff.poolalloc
time_print(elapsedtime, bytes, diff.total_time, allocs)
val
end
end

macro timev(ex)
quote
local stats = gc_num()
local elapsedtime = time_ns()
local val = $(esc(ex))
elapsedtime = time_ns() - elapsedtime
timev_print(elapsedtime, GC_Diff(gc_num(), stats))
val
end
end
Expand All @@ -89,8 +207,7 @@ macro allocated(ex)
function f()
b0 = gc_bytes()
$(esc(ex))
b1 = gc_bytes()
b1-b0
gc_bytes() - b0
end
f()
end
Expand All @@ -100,14 +217,12 @@ end
# print nothing, return value, elapsed time, bytes allocated & gc time
macro timed(ex)
quote
local b0 = gc_bytes()
local t0 = time_ns()
local g0 = gc_time_ns()
local stats = gc_num()
local elapsedtime = time_ns()
local val = $(esc(ex))
local g1 = gc_time_ns()
local t1 = time_ns()
local b1 = gc_bytes()
val, (t1-t0)/1e9, b1-b0, (g1-g0)/1e9
elapsedtime = time_ns() - elapsedtime
diff = GC_Diff(gc_num(), stats)
val, elapsedtime/1e9, diff.total_allocd + diff.allocd, diff.total_time/1e9, diff
end
end

Expand Down
55 changes: 42 additions & 13 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,34 @@ extern "C" {
#define jl_valueof(v) (&((jl_taggedvalue_t*)(v))->value)

int jl_in_gc; // referenced from switchto task.c
static int64_t allocd_bytes;
static int64_t freed_bytes;

// This struct must be kept in sync with the Julia type of the same name in base/util.jl
typedef struct {
int64_t allocd;
int64_t freed;
uint64_t malloc;
uint64_t realloc;
uint64_t poolalloc;
uint64_t freecall;
uint64_t total_time;
uint64_t total_allocd;
uint64_t since_sweep;
size_t collect;
int pause;
int full_sweep;
} GC_Num;

static GC_Num gc_num = {0,0,0,0,0,0,0,0,0,0,0,0};

#define collect_interval gc_num.collect
#define n_pause gc_num.pause
#define n_full_sweep gc_num.full_sweep
#define allocd_bytes gc_num.allocd
#define freed_bytes gc_num.freed
#define total_gc_time gc_num.total_time
#define total_allocd_bytes gc_num.total_allocd
#define allocd_bytes_since_sweep gc_num.since_sweep

static long system_page_size;

// malloc wrappers, aligned allocation
Expand Down Expand Up @@ -310,7 +336,6 @@ static size_t max_collect_interval = 1250000000UL;
#define default_collect_interval (3200*1024*sizeof(void*))
static size_t max_collect_interval = 500000000UL;
#endif
static size_t collect_interval;

#define HEAP_DECL static

Expand Down Expand Up @@ -369,9 +394,6 @@ static bigval_t *big_objects_marked = NULL;

// global variables for GC stats

static int64_t total_allocd_bytes = 0;
static int64_t allocd_bytes_since_sweep = 0;
static uint64_t total_gc_time = 0;
#define NS_TO_S(t) ((double)(t/1000)/(1000*1000))
#define NS2MS(t) ((double)(t/1000)/1000)
static int64_t live_bytes = 0;
Expand All @@ -391,8 +413,6 @@ static uint64_t total_sweep_time=0;
static uint64_t total_mark_time=0;
static uint64_t total_fin_time=0;
#endif
static int n_pause = 0;
static int n_full_sweep = 0;
int sweeping = 0;

// manipulating mark bits
Expand Down Expand Up @@ -1032,6 +1052,7 @@ static inline void *__pool_alloc(pool_t* p, int osize, int end_offset)
jl_gc_collect(0);
//allocd_bytes += osize;
}
gc_num.poolalloc++;
// first try to use the freelist
v = p->freelist;
if (v) {
Expand Down Expand Up @@ -2071,8 +2092,7 @@ DLLEXPORT int jl_gc_is_enabled(void) { return is_gc_enabled; }

DLLEXPORT int64_t jl_gc_total_bytes(void) { return total_allocd_bytes + allocd_bytes + collect_interval; }
DLLEXPORT uint64_t jl_gc_total_hrtime(void) { return total_gc_time; }
DLLEXPORT int64_t jl_gc_num_pause(void) { return n_pause; }
DLLEXPORT int64_t jl_gc_num_full_sweep(void) { return n_full_sweep; }
DLLEXPORT GC_Num jl_gc_num(void) { return gc_num; }

int64_t diff_gc_total_bytes(void)
{
Expand Down Expand Up @@ -2665,6 +2685,7 @@ DLLEXPORT jl_value_t *allocobj(size_t sz)
if (allocsz < sz) // overflow in adding offs, size was "negative"
jl_throw(jl_memory_exception);
allocd_bytes += allocsz;
gc_num.alloc++;
return jl_valueof(malloc(allocsz));
}
int64_t diff_gc_total_bytes(void)
Expand All @@ -2688,6 +2709,7 @@ DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
{
maybe_collect();
allocd_bytes += sz;
gc_num.malloc++;
void *b = malloc(sz);
if (b == NULL)
jl_throw(jl_memory_exception);
Expand All @@ -2698,12 +2720,16 @@ DLLEXPORT void jl_gc_counted_free(void *p, size_t sz)
{
free(p);
freed_bytes += sz;
gc_num.freecall++;
}

DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz)
{
maybe_collect();
allocd_bytes += (sz-old);

if (sz < old) freed_bytes += (old - sz);
else allocd_bytes += (sz - old);
gc_num.realloc++;
void *b = realloc(p, sz);
if (b == NULL)
jl_throw(jl_memory_exception);
Expand All @@ -2713,10 +2739,11 @@ DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t
DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
{
maybe_collect();
allocd_bytes += sz;
size_t allocsz = LLT_ALIGN(sz, 16);
if (allocsz < sz) // overflow in adding offs, size was "negative"
jl_throw(jl_memory_exception);
allocd_bytes += allocsz;
gc_num.malloc++;
void *b = malloc_a16(allocsz);
if (b == NULL)
jl_throw(jl_memory_exception);
Expand All @@ -2738,7 +2765,9 @@ DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, int isal
}
else
#endif
allocd_bytes += allocsz - oldsz;
if (allocsz < oldsz) freed_bytes += (oldsz - allocsz);
else allocd_bytes += (allocsz - oldsz);
gc_num.realloc++;

void *b;
if (isaligned)
Expand Down

0 comments on commit 7582922

Please sign in to comment.