Skip to content

Commit

Permalink
More efficient object finalising implementation
Browse files Browse the repository at this point in the history
Thanks to Sylvan and Andy Turley for their ideas.

* The old finaliser implementation used the object hashmap to keep
  track of finalisers that needed to be run. This was not ideal
  because while the hashmap provides constant time operations,
  the constant time was still much larger than the time for a normal
  no finaliser allocation. Additionally, keeping track of finalisers
  in the object hashmap meant that every object with a finaliser
  would be added to the object hashmap even if it was only transient
  and never sent to another actor. This, once again, was different
  from normal allocations where the objects wouldn't be added to
  the hashmap until they were sent to another actor. The benchmark
  using ponybench showed that objects with finalisers were about
  1 order of magnitude slower than objects without finalisers due
  to the overhead of using the object hashmap for tracking them.
* The new finaliser implementation keeps the finaliser information
  in the chunk where the memory was allocated from. This is exactly
  the same as how non-finaliser allocations are tracked except for
  the additional work to keep track of the finaliser. The resulting
  benefit is that objects with finalisers will only get added to the
  object hashmap under the same circumstances as objects without
  finalisers. This gives us an increase in performance by 1 order of
  magnitude so that now objects with finalisers have the same allocation
  performance as objects without finalisers.
* Keep a finaliser bitmap in chunk_t instead of an array of finaliser
  pointers. Run the finaliser from the pony_type_t->final_fn instead
  of storing/using the function passed in to pony_alloc_final.
* Add pony_alloc_small_final and pony_alloc_large_final functions
  to avoid having to go through a branch and another function call
  to allocate memory with a finaliser.
* Update compiler to call the appropriate one of pony_alloc_small_final
  or pony_alloc_large_final instead of pony_alloc_final.

Future work:

* It should be possible to remove pony_alloc_*_final finctions altogether and
  update pony_alloc, pony_alloc_small, pony_alloc_large to take
  a boolean as to whether a finaliser exists for the type or not.
  This would also require changes to the compiler to generate the
  appropriate boolean true/false for when a finaliser exists or not.
  • Loading branch information
dipinhora committed Mar 10, 2017
1 parent 7487c51 commit e2ae104
Show file tree
Hide file tree
Showing 13 changed files with 240 additions and 140 deletions.
10 changes: 6 additions & 4 deletions benchmark/libponyrt/mem/heap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,12 @@ BENCHMARK_DEFINE_F(HeapBench, HeapAlloc)(benchmark::State& st) {
if(st.range(0) > HEAP_MAX)
{
st.ResumeTiming();
ponyint_heap_alloc_large(actor, &_heap, st.range(0));
ponyint_heap_alloc_large(actor, &_heap, st.range(0), false);
st.PauseTiming();
} else {
st.ResumeTiming();
ponyint_heap_alloc_small(actor, &_heap, ponyint_heap_index(st.range(0)));
ponyint_heap_alloc_small(actor, &_heap, ponyint_heap_index(st.range(0)),
false);
st.PauseTiming();
}
ponyint_heap_destroy(&_heap);
Expand All @@ -58,9 +59,10 @@ BENCHMARK_DEFINE_F(HeapBench, HeapDestroy)(benchmark::State& st) {
while (st.KeepRunning()) {
st.PauseTiming();
if(st.range(0) > HEAP_MAX)
ponyint_heap_alloc_large(actor, &_heap, st.range(0));
ponyint_heap_alloc_large(actor, &_heap, st.range(0), false);
else
ponyint_heap_alloc_small(actor, &_heap, ponyint_heap_index(st.range(0)));
ponyint_heap_alloc_small(actor, &_heap, ponyint_heap_index(st.range(0)),
false);
st.ResumeTiming();
ponyint_heap_destroy(&_heap);
st.PauseTiming();
Expand Down
49 changes: 46 additions & 3 deletions src/libponyc/codegen/codegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -428,11 +428,10 @@ static void init_runtime(compile_t* c)
LLVMSetDereferenceableOrNull(value, 0, HEAP_MIN);
#endif

// i8* pony_alloc_final(i8*, intptr, c->final_fn)
// i8* pony_alloc_final(i8*, intptr)
params[0] = c->void_ptr;
params[1] = c->intptr;
params[2] = c->final_fn;
type = LLVMFunctionType(c->void_ptr, params, 3, false);
type = LLVMFunctionType(c->void_ptr, params, 2, false);
value = LLVMAddFunction(c->module, "pony_alloc_final", type);
#if PONY_LLVM >= 309
LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
Expand All @@ -451,6 +450,50 @@ static void init_runtime(compile_t* c)
LLVMSetDereferenceableOrNull(value, 0, HEAP_MIN);
#endif

// i8* pony_alloc_small_final(i8*, i32)
params[0] = c->void_ptr;
params[1] = c->i32;
type = LLVMFunctionType(c->void_ptr, params, 2, false);
value = LLVMAddFunction(c->module, "pony_alloc_small_final", type);
#if PONY_LLVM >= 309
LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
inacc_or_arg_mem_attr);
LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, noalias_attr);
LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex,
deref_alloc_small_attr);
LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, align_heap_attr);
#else
LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
# if PONY_LLVM >= 308
LLVMSetInaccessibleMemOrArgMemOnly(value);
# endif
LLVMSetReturnNoAlias(value);
LLVMSetDereferenceable(value, 0, HEAP_MIN);
#endif

// i8* pony_alloc_large_final(i8*, intptr)
params[0] = c->void_ptr;
params[1] = c->intptr;
type = LLVMFunctionType(c->void_ptr, params, 2, false);
value = LLVMAddFunction(c->module, "pony_alloc_large_final", type);
#if PONY_LLVM >= 309
LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
inacc_or_arg_mem_attr);
LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, noalias_attr);
LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex,
deref_alloc_large_attr);
LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, align_heap_attr);
#else
LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
# if PONY_LLVM >= 308
LLVMSetInaccessibleMemOrArgMemOnly(value);
# endif
LLVMSetReturnNoAlias(value);
LLVMSetDereferenceable(value, 0, HEAP_MAX << 1);
#endif

// $message* pony_alloc_msg(i32, i32)
params[0] = c->i32;
params[1] = c->i32;
Expand Down
21 changes: 10 additions & 11 deletions src/libponyc/codegen/gencall.c
Original file line number Diff line number Diff line change
Expand Up @@ -1142,21 +1142,20 @@ LLVMValueRef gencall_allocstruct(compile_t* c, reach_type_t* t)
if(size == 0)
size = 1;

if(t->final_fn == NULL)
if(size <= HEAP_MAX)
{
if(size <= HEAP_MAX)
{
uint32_t index = ponyint_heap_index(size);
args[1] = LLVMConstInt(c->i32, index, false);
uint32_t index = ponyint_heap_index(size);
args[1] = LLVMConstInt(c->i32, index, false);
if(t->final_fn == NULL)
result = gencall_runtime(c, "pony_alloc_small", args, 2, "");
} else {
args[1] = LLVMConstInt(c->intptr, size, false);
result = gencall_runtime(c, "pony_alloc_large", args, 2, "");
}
else
result = gencall_runtime(c, "pony_alloc_small_final", args, 2, "");
} else {
args[1] = LLVMConstInt(c->intptr, size, false);
args[2] = LLVMConstBitCast(t->final_fn, c->final_fn);
result = gencall_runtime(c, "pony_alloc_final", args, 3, "");
if(t->final_fn == NULL)
result = gencall_runtime(c, "pony_alloc_large", args, 2, "");
else
result = gencall_runtime(c, "pony_alloc_large_final", args, 2, "");
}

result = LLVMBuildBitCast(c->builder, result, t->structure_ptr, "");
Expand Down
34 changes: 24 additions & 10 deletions src/libponyrt/actor/actor.c
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,8 @@ void ponyint_actor_final(pony_ctx_t* ctx, pony_actor_t* actor)
actor->type->final(actor);

// Run all outstanding object finalisers.
ponyint_gc_final(ctx, &actor->gc);
ponyint_heap_final(&actor->heap);


// Restore the current actor.
ctx->current = prev;
Expand Down Expand Up @@ -382,38 +383,51 @@ PONY_API void* pony_alloc(pony_ctx_t* ctx, size_t size)
{
DTRACE2(HEAP_ALLOC, (uintptr_t)ctx->scheduler, size);

return ponyint_heap_alloc(ctx->current, &ctx->current->heap, size);
return ponyint_heap_alloc(ctx->current, &ctx->current->heap, size, false);
}

PONY_API void* pony_alloc_small(pony_ctx_t* ctx, uint32_t sizeclass)
{
DTRACE2(HEAP_ALLOC, (uintptr_t)ctx->scheduler, HEAP_MIN << sizeclass);

return ponyint_heap_alloc_small(ctx->current, &ctx->current->heap, sizeclass);
return ponyint_heap_alloc_small(ctx->current, &ctx->current->heap, sizeclass, false);
}

PONY_API void* pony_alloc_large(pony_ctx_t* ctx, size_t size)
{
DTRACE2(HEAP_ALLOC, (uintptr_t)ctx->scheduler, size);

return ponyint_heap_alloc_large(ctx->current, &ctx->current->heap, size);
return ponyint_heap_alloc_large(ctx->current, &ctx->current->heap, size, false);
}

PONY_API void* pony_realloc(pony_ctx_t* ctx, void* p, size_t size)
{
DTRACE2(HEAP_ALLOC, (uintptr_t)ctx->scheduler, size);

return ponyint_heap_realloc(ctx->current, &ctx->current->heap, p, size);
return ponyint_heap_realloc(ctx->current, &ctx->current->heap, p, size, false);
}

PONY_API void* pony_alloc_final(pony_ctx_t* ctx, size_t size)
{
DTRACE2(HEAP_ALLOC, (uintptr_t)ctx->scheduler, size);

return ponyint_heap_alloc(ctx->current, &ctx->current->heap, size, true);
}

void* pony_alloc_small_final(pony_ctx_t* ctx, uint32_t sizeclass)
{
DTRACE2(HEAP_ALLOC, (uintptr_t)ctx->scheduler, HEAP_MIN << sizeclass);

return ponyint_heap_alloc_small(ctx->current, &ctx->current->heap, sizeclass,
true);
}

PONY_API void* pony_alloc_final(pony_ctx_t* ctx, size_t size,
pony_final_fn final)
void* pony_alloc_large_final(pony_ctx_t* ctx, size_t size)
{
DTRACE2(HEAP_ALLOC, (uintptr_t)ctx->scheduler, size);

void* p = ponyint_heap_alloc(ctx->current, &ctx->current->heap, size);
ponyint_gc_register_final(ctx, p, final);
return p;
return ponyint_heap_alloc_large(ctx->current, &ctx->current->heap, size,
true);
}

PONY_API void pony_triggergc(pony_actor_t* actor)
Expand Down
41 changes: 1 addition & 40 deletions src/libponyrt/gc/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,7 @@ void ponyint_gc_discardstack(pony_ctx_t* ctx)

void ponyint_gc_sweep(pony_ctx_t* ctx, gc_t* gc)
{
gc->finalisers -= ponyint_objectmap_sweep(&gc->local);
ponyint_objectmap_sweep(&gc->local);
gc->delta = ponyint_actormap_sweep(ctx, &gc->foreign, gc->mark, gc->delta);
}

Expand Down Expand Up @@ -718,45 +718,6 @@ void ponyint_gc_sendrelease_manual(pony_ctx_t* ctx)
pony_assert(ponyint_actormap_size(&ctx->acquire) == 0);
}

void ponyint_gc_register_final(pony_ctx_t* ctx, void* p, pony_final_fn final)
{
if(!ctx->finalising)
{
// If we aren't finalising an actor, register the finaliser.
gc_t* gc = ponyint_actor_gc(ctx->current);
ponyint_objectmap_register_final(&gc->local, p, final, gc->mark);
gc->finalisers++;
} else {
// Otherwise, put the finaliser on the gc stack.
recurse(ctx, p, final);
}
}

void ponyint_gc_final(pony_ctx_t* ctx, gc_t* gc)
{
if(gc->finalisers == 0)
return;

// Set the finalising flag.
ctx->finalising = true;

// Run all finalisers in the object map.
ponyint_objectmap_final(&gc->local);

// Finalise any objects that were created during finalisation.
pony_final_fn f;
void *p;

while(ctx->stack != NULL)
{
ctx->stack = ponyint_gcstack_pop(ctx->stack, (void**)&f);
ctx->stack = ponyint_gcstack_pop(ctx->stack, &p);
f(p);
}

ctx->finalising = false;
}

void ponyint_gc_done(gc_t* gc)
{
gc->mark++;
Expand Down
5 changes: 0 additions & 5 deletions src/libponyrt/gc/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ typedef struct gc_t
uint32_t mark;
uint32_t rc_mark;
size_t rc;
size_t finalisers;
objectmap_t local;
actormap_t foreign;
deltamap_t* delta;
Expand Down Expand Up @@ -75,10 +74,6 @@ size_t ponyint_gc_rc(gc_t* gc);

deltamap_t* ponyint_gc_delta(gc_t* gc);

void ponyint_gc_register_final(pony_ctx_t* ctx, void* p, pony_final_fn final);

void ponyint_gc_final(pony_ctx_t* ctx, gc_t* gc);

void ponyint_gc_done(gc_t* gc);

void ponyint_gc_destroy(gc_t* gc);
Expand Down
40 changes: 1 addition & 39 deletions src/libponyrt/gc/objectmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ static object_t* object_alloc(void* address, uint32_t mark)
{
object_t* obj = (object_t*)POOL_ALLOC(object_t);
obj->address = address;
obj->final = NULL;
obj->rc = 0;
obj->immutable = false;

Expand Down Expand Up @@ -59,34 +58,12 @@ object_t* ponyint_objectmap_getorput(objectmap_t* map, void* address,
return obj;
}

object_t* ponyint_objectmap_register_final(objectmap_t* map, void* address,
pony_final_fn final, uint32_t mark)
void ponyint_objectmap_sweep(objectmap_t* map)
{
object_t* obj = ponyint_objectmap_getorput(map, address, mark);
obj->final = final;
return obj;
}

void ponyint_objectmap_final(objectmap_t* map)
{
size_t i = HASHMAP_BEGIN;
object_t* obj;

while((obj = ponyint_objectmap_next(map, &i)) != NULL)
{
if(obj->final != NULL)
obj->final(obj->address);
}
}

size_t ponyint_objectmap_sweep(objectmap_t* map)
{
size_t count = 0;
size_t i = HASHMAP_BEGIN;
object_t* obj;
bool needs_optimize = false;


while((obj = ponyint_objectmap_next(map, &i)) != NULL)
{
void* p = obj->address;
Expand All @@ -96,19 +73,6 @@ size_t ponyint_objectmap_sweep(objectmap_t* map)
chunk_t* chunk = (chunk_t*)ponyint_pagemap_get(p);
ponyint_heap_mark_shallow(chunk, p);
} else {
if(obj->final != NULL)
{
// If we are not free in the heap, don't run the finaliser and don't
// remove this entry from the object map.
chunk_t* chunk = (chunk_t*)ponyint_pagemap_get(p);

if(ponyint_heap_ismarked(chunk, p))
continue;

obj->final(p);
count++;
}

ponyint_objectmap_clearindex(map, i);
needs_optimize = true;

Expand All @@ -118,6 +82,4 @@ size_t ponyint_objectmap_sweep(objectmap_t* map)

if(needs_optimize)
ponyint_objectmap_optimize(map);

return count;
}
8 changes: 1 addition & 7 deletions src/libponyrt/gc/objectmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ PONY_EXTERN_C_BEGIN
typedef struct object_t
{
void* address;
pony_final_fn final;
size_t rc;
uint32_t mark;
bool immutable;
Expand All @@ -22,12 +21,7 @@ object_t* ponyint_objectmap_getobject(objectmap_t* map, void* address, size_t* i
object_t* ponyint_objectmap_getorput(objectmap_t* map, void* address,
uint32_t mark);

object_t* ponyint_objectmap_register_final(objectmap_t* map, void* address,
pony_final_fn final, uint32_t mark);

void ponyint_objectmap_final(objectmap_t* map);

size_t ponyint_objectmap_sweep(objectmap_t* map);
void ponyint_objectmap_sweep(objectmap_t* map);

PONY_EXTERN_C_END

Expand Down
Loading

0 comments on commit e2ae104

Please sign in to comment.