diff --git a/src/libponyc/codegen/codegen.c b/src/libponyc/codegen/codegen.c index e347fd7e2c..3ad4f25a3f 100644 --- a/src/libponyc/codegen/codegen.c +++ b/src/libponyc/codegen/codegen.c @@ -428,11 +428,10 @@ static void init_runtime(compile_t* c) LLVMSetDereferenceableOrNull(value, 0, HEAP_MIN); #endif - // i8* pony_alloc_final(i8*, intptr, c->final_fn) + // i8* pony_alloc_final(i8*, intptr) params[0] = c->void_ptr; params[1] = c->intptr; - params[2] = c->final_fn; - type = LLVMFunctionType(c->void_ptr, params, 3, false); + type = LLVMFunctionType(c->void_ptr, params, 2, false); value = LLVMAddFunction(c->module, "pony_alloc_final", type); #if PONY_LLVM >= 309 LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr); @@ -451,6 +450,50 @@ static void init_runtime(compile_t* c) LLVMSetDereferenceableOrNull(value, 0, HEAP_MIN); #endif + // i8* pony_alloc_small_final(i8*, i32) + params[0] = c->void_ptr; + params[1] = c->i32; + type = LLVMFunctionType(c->void_ptr, params, 2, false); + value = LLVMAddFunction(c->module, "pony_alloc_small_final", type); +#if PONY_LLVM >= 309 + LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr); + LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, + inacc_or_arg_mem_attr); + LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, noalias_attr); + LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, + deref_alloc_small_attr); + LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, align_heap_attr); +#else + LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute); +# if PONY_LLVM >= 308 + LLVMSetInaccessibleMemOrArgMemOnly(value); +# endif + LLVMSetReturnNoAlias(value); + LLVMSetDereferenceable(value, 0, HEAP_MIN); +#endif + + // i8* pony_alloc_large_final(i8*, intptr) + params[0] = c->void_ptr; + params[1] = c->intptr; + type = LLVMFunctionType(c->void_ptr, params, 2, false); + value = LLVMAddFunction(c->module, "pony_alloc_large_final", type); +#if PONY_LLVM >= 309 + LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr); + LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, + inacc_or_arg_mem_attr); + LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, noalias_attr); + LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, + deref_alloc_large_attr); + LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, align_heap_attr); +#else + LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute); +# if PONY_LLVM >= 308 + LLVMSetInaccessibleMemOrArgMemOnly(value); +# endif + LLVMSetReturnNoAlias(value); + LLVMSetDereferenceable(value, 0, HEAP_MAX << 1); +#endif + // $message* pony_alloc_msg(i32, i32) params[0] = c->i32; params[1] = c->i32; diff --git a/src/libponyc/codegen/gencall.c b/src/libponyc/codegen/gencall.c index f89f719867..0be2c85f65 100644 --- a/src/libponyc/codegen/gencall.c +++ b/src/libponyc/codegen/gencall.c @@ -1142,21 +1142,20 @@ LLVMValueRef gencall_allocstruct(compile_t* c, reach_type_t* t) if(size == 0) size = 1; - if(t->final_fn == NULL) + if(size <= HEAP_MAX) { - if(size <= HEAP_MAX) - { - uint32_t index = ponyint_heap_index(size); - args[1] = LLVMConstInt(c->i32, index, false); + uint32_t index = ponyint_heap_index(size); + args[1] = LLVMConstInt(c->i32, index, false); + if(t->final_fn == NULL) result = gencall_runtime(c, "pony_alloc_small", args, 2, ""); - } else { - args[1] = LLVMConstInt(c->intptr, size, false); - result = gencall_runtime(c, "pony_alloc_large", args, 2, ""); - } + else + result = gencall_runtime(c, "pony_alloc_small_final", args, 2, ""); } else { args[1] = LLVMConstInt(c->intptr, size, false); - args[2] = LLVMConstBitCast(t->final_fn, c->final_fn); - result = gencall_runtime(c, "pony_alloc_final", args, 3, ""); + if(t->final_fn == NULL) + result = gencall_runtime(c, "pony_alloc_large", args, 2, ""); + else + result = gencall_runtime(c, "pony_alloc_large_final", args, 2, ""); } result = LLVMBuildBitCast(c->builder, result, t->structure_ptr, ""); diff --git a/src/libponyrt/actor/actor.c b/src/libponyrt/actor/actor.c index ba845ecbdd..4b4775a0fd 100644 --- a/src/libponyrt/actor/actor.c +++ b/src/libponyrt/actor/actor.c @@ -261,7 +261,8 @@ void ponyint_actor_final(pony_ctx_t* ctx, pony_actor_t* actor) actor->type->final(actor); // Run all outstanding object finalisers. - ponyint_gc_final(ctx, &actor->gc); + ponyint_heap_final(&actor->heap); + // Restore the current actor. ctx->current = prev; @@ -406,14 +407,27 @@ PONY_API void* pony_realloc(pony_ctx_t* ctx, void* p, size_t size) return ponyint_heap_realloc(ctx->current, &ctx->current->heap, p, size); } -PONY_API void* pony_alloc_final(pony_ctx_t* ctx, size_t size, - pony_final_fn final) +PONY_API void* pony_alloc_final(pony_ctx_t* ctx, size_t size) +{ + DTRACE2(HEAP_ALLOC, (uintptr_t)ctx->scheduler, size); + + return ponyint_heap_alloc_final(ctx->current, &ctx->current->heap, size); +} + +void* pony_alloc_small_final(pony_ctx_t* ctx, uint32_t sizeclass) +{ + DTRACE2(HEAP_ALLOC, (uintptr_t)ctx->scheduler, HEAP_MIN << sizeclass); + + return ponyint_heap_alloc_small_final(ctx->current, &ctx->current->heap, + sizeclass); +} + +void* pony_alloc_large_final(pony_ctx_t* ctx, size_t size) { DTRACE2(HEAP_ALLOC, (uintptr_t)ctx->scheduler, size); - void* p = ponyint_heap_alloc(ctx->current, &ctx->current->heap, size); - ponyint_gc_register_final(ctx, p, final); - return p; + return ponyint_heap_alloc_large_final(ctx->current, &ctx->current->heap, + size); } PONY_API void pony_triggergc(pony_actor_t* actor) diff --git a/src/libponyrt/gc/gc.c b/src/libponyrt/gc/gc.c index 0d92322781..6aa09e95c6 100644 --- a/src/libponyrt/gc/gc.c +++ b/src/libponyrt/gc/gc.c @@ -619,7 +619,7 @@ void ponyint_gc_discardstack(pony_ctx_t* ctx) void ponyint_gc_sweep(pony_ctx_t* ctx, gc_t* gc) { - gc->finalisers -= ponyint_objectmap_sweep(&gc->local); + ponyint_objectmap_sweep(&gc->local); gc->delta = ponyint_actormap_sweep(ctx, &gc->foreign, gc->mark, gc->delta); } @@ -718,45 +718,6 @@ void ponyint_gc_sendrelease_manual(pony_ctx_t* ctx) pony_assert(ponyint_actormap_size(&ctx->acquire) == 0); } -void ponyint_gc_register_final(pony_ctx_t* ctx, void* p, pony_final_fn final) -{ - if(!ctx->finalising) - { - // If we aren't finalising an actor, register the finaliser. - gc_t* gc = ponyint_actor_gc(ctx->current); - ponyint_objectmap_register_final(&gc->local, p, final, gc->mark); - gc->finalisers++; - } else { - // Otherwise, put the finaliser on the gc stack. - recurse(ctx, p, final); - } -} - -void ponyint_gc_final(pony_ctx_t* ctx, gc_t* gc) -{ - if(gc->finalisers == 0) - return; - - // Set the finalising flag. - ctx->finalising = true; - - // Run all finalisers in the object map. - ponyint_objectmap_final(&gc->local); - - // Finalise any objects that were created during finalisation. - pony_final_fn f; - void *p; - - while(ctx->stack != NULL) - { - ctx->stack = ponyint_gcstack_pop(ctx->stack, (void**)&f); - ctx->stack = ponyint_gcstack_pop(ctx->stack, &p); - f(p); - } - - ctx->finalising = false; -} - void ponyint_gc_done(gc_t* gc) { gc->mark++; diff --git a/src/libponyrt/gc/gc.h b/src/libponyrt/gc/gc.h index 5f3f8e72d4..060d1e873f 100644 --- a/src/libponyrt/gc/gc.h +++ b/src/libponyrt/gc/gc.h @@ -18,7 +18,6 @@ typedef struct gc_t uint32_t mark; uint32_t rc_mark; size_t rc; - size_t finalisers; objectmap_t local; actormap_t foreign; deltamap_t* delta; @@ -75,10 +74,6 @@ size_t ponyint_gc_rc(gc_t* gc); deltamap_t* ponyint_gc_delta(gc_t* gc); -void ponyint_gc_register_final(pony_ctx_t* ctx, void* p, pony_final_fn final); - -void ponyint_gc_final(pony_ctx_t* ctx, gc_t* gc); - void ponyint_gc_done(gc_t* gc); void ponyint_gc_destroy(gc_t* gc); diff --git a/src/libponyrt/gc/objectmap.c b/src/libponyrt/gc/objectmap.c index 7977a30fee..b162967ab7 100644 --- a/src/libponyrt/gc/objectmap.c +++ b/src/libponyrt/gc/objectmap.c @@ -20,7 +20,6 @@ static object_t* object_alloc(void* address, uint32_t mark) { object_t* obj = (object_t*)POOL_ALLOC(object_t); obj->address = address; - obj->final = NULL; obj->rc = 0; obj->immutable = false; @@ -59,34 +58,12 @@ object_t* ponyint_objectmap_getorput(objectmap_t* map, void* address, return obj; } -object_t* ponyint_objectmap_register_final(objectmap_t* map, void* address, - pony_final_fn final, uint32_t mark) +void ponyint_objectmap_sweep(objectmap_t* map) { - object_t* obj = ponyint_objectmap_getorput(map, address, mark); - obj->final = final; - return obj; -} - -void ponyint_objectmap_final(objectmap_t* map) -{ - size_t i = HASHMAP_BEGIN; - object_t* obj; - - while((obj = ponyint_objectmap_next(map, &i)) != NULL) - { - if(obj->final != NULL) - obj->final(obj->address); - } -} - -size_t ponyint_objectmap_sweep(objectmap_t* map) -{ - size_t count = 0; size_t i = HASHMAP_BEGIN; object_t* obj; bool needs_optimize = false; - while((obj = ponyint_objectmap_next(map, &i)) != NULL) { void* p = obj->address; @@ -96,19 +73,6 @@ size_t ponyint_objectmap_sweep(objectmap_t* map) chunk_t* chunk = (chunk_t*)ponyint_pagemap_get(p); ponyint_heap_mark_shallow(chunk, p); } else { - if(obj->final != NULL) - { - // If we are not free in the heap, don't run the finaliser and don't - // remove this entry from the object map. - chunk_t* chunk = (chunk_t*)ponyint_pagemap_get(p); - - if(ponyint_heap_ismarked(chunk, p)) - continue; - - obj->final(p); - count++; - } - ponyint_objectmap_clearindex(map, i); needs_optimize = true; @@ -118,6 +82,4 @@ size_t ponyint_objectmap_sweep(objectmap_t* map) if(needs_optimize) ponyint_objectmap_optimize(map); - - return count; } diff --git a/src/libponyrt/gc/objectmap.h b/src/libponyrt/gc/objectmap.h index 96082e9de6..0eeedb1d74 100644 --- a/src/libponyrt/gc/objectmap.h +++ b/src/libponyrt/gc/objectmap.h @@ -9,7 +9,6 @@ PONY_EXTERN_C_BEGIN typedef struct object_t { void* address; - pony_final_fn final; size_t rc; uint32_t mark; bool immutable; @@ -22,12 +21,7 @@ object_t* ponyint_objectmap_getobject(objectmap_t* map, void* address, size_t* i object_t* ponyint_objectmap_getorput(objectmap_t* map, void* address, uint32_t mark); -object_t* ponyint_objectmap_register_final(objectmap_t* map, void* address, - pony_final_fn final, uint32_t mark); - -void ponyint_objectmap_final(objectmap_t* map); - -size_t ponyint_objectmap_sweep(objectmap_t* map); +void ponyint_objectmap_sweep(objectmap_t* map); PONY_EXTERN_C_END diff --git a/src/libponyrt/mem/heap.c b/src/libponyrt/mem/heap.c index 5442b6ad60..fd3b2f4d11 100644 --- a/src/libponyrt/mem/heap.c +++ b/src/libponyrt/mem/heap.c @@ -17,6 +17,7 @@ typedef struct chunk_t // mutable uint32_t slots; uint32_t shallow; + uint32_t finalisers; struct chunk_t* next; } chunk_t; @@ -71,9 +72,77 @@ static void clear_chunk(chunk_t* chunk, uint32_t mark) chunk->shallow = mark; } +static void final_small(chunk_t* chunk, uint32_t mark) +{ + // run any finalisers that need to be run + void* p = NULL; + + uint32_t finalisers = chunk->finalisers; + uint64_t bit = 0; + + // if there's a finaliser to run for a used slot + while((finalisers != 0) && (0 != (bit = __pony_ctzl(finalisers)))) { + p = chunk->m + (bit << HEAP_MINBITS); + + // run finaliser + pony_assert((*(pony_type_t**)p)->final != NULL); + (*(pony_type_t**)p)->final(p); + + // clear finaliser in chunk + chunk->finalisers &= ~(1 << bit); + + // clear bit just found in our local finaliser map + finalisers &= (finalisers - 1); + } + (void)mark; +} + +static void final_small_freed(chunk_t* chunk) +{ + // run any finalisers that need to be run for any newly freed slots + void* p = NULL; + + uint32_t finalisers = chunk->finalisers; + uint64_t bit = 0; + + // if there's a finaliser to run for a used slot + while((finalisers != 0) && (0 != (bit = __pony_ctzl(finalisers)))) { + // nothing to do if the slot isn't empty + if((chunk->slots & (1 << bit)) == 0) + continue; + + p = chunk->m + (bit << HEAP_MINBITS); + + // run finaliser + pony_assert((*(pony_type_t**)p)->final != NULL); + (*(pony_type_t**)p)->final(p); + + // clear finaliser in chunk + chunk->finalisers &= ~(1 << bit); + + // clear bit just found in our local finaliser map + finalisers &= (finalisers - 1); + } +} + +static void final_large(chunk_t* chunk, uint32_t mark) +{ + if(chunk->finalisers == 1) + { + // run finaliser + (*(pony_type_t**)chunk->m)->final(chunk->m); + chunk->finalisers = 0; + } + (void)mark; +} + static void destroy_small(chunk_t* chunk, uint32_t mark) { (void)mark; + + // run any finalisers that need running + final_small(chunk, mark); + ponyint_pagemap_set(chunk->m, NULL); POOL_FREE(block_t, chunk->m); POOL_FREE(chunk_t, chunk); @@ -81,7 +150,12 @@ static void destroy_small(chunk_t* chunk, uint32_t mark) static void destroy_large(chunk_t* chunk, uint32_t mark) { + (void)mark; + + // run any finalisers that need running + final_large(chunk, mark); + large_pagemap(chunk->m, chunk->size, NULL); if(chunk->m != NULL) @@ -113,6 +187,9 @@ static size_t sweep_small(chunk_t* chunk, chunk_t** avail, chunk_t** full, (__pony_popcount(chunk->slots) * size); chunk->next = *avail; *avail = chunk; + + // run finalisers for freed slots + final_small_freed(chunk); } chunk = next; @@ -196,6 +273,17 @@ void ponyint_heap_destroy(heap_t* heap) } } +void ponyint_heap_final(heap_t* heap) +{ + chunk_list(final_large, heap->large, 0); + + for(int i = 0; i < HEAP_SIZECLASSES; i++) + { + chunk_list(final_small, heap->small_free[i], 0); + chunk_list(final_small, heap->small_full[i], 0); + } +} + void* ponyint_heap_alloc(pony_actor_t* actor, heap_t* heap, size_t size) { if(size == 0) @@ -208,6 +296,19 @@ void* ponyint_heap_alloc(pony_actor_t* actor, heap_t* heap, size_t size) } } +void* ponyint_heap_alloc_final(pony_actor_t* actor, heap_t* heap, size_t size) +{ + if(size == 0) + { + return NULL; + } else if(size <= HEAP_MAX) { + return ponyint_heap_alloc_small_final(actor, heap, + ponyint_heap_index(size)); + } else { + return ponyint_heap_alloc_large_final(actor, heap, size); + } +} + void* ponyint_heap_alloc_small(pony_actor_t* actor, heap_t* heap, uint32_t sizeclass) { @@ -237,6 +338,61 @@ void* ponyint_heap_alloc_small(pony_actor_t* actor, heap_t* heap, n->m = (char*) POOL_ALLOC(block_t); n->size = sizeclass; + // note that no finaliser needs to run + n->finalisers = 0; + + // Clear the first bit. + n->shallow = n->slots = sizeclass_init[sizeclass]; + n->next = NULL; + + ponyint_pagemap_set(n->m, n); + + heap->small_free[sizeclass] = n; + chunk = n; + + // Use the first slot. + m = chunk->m; + } + + heap->used += SIZECLASS_SIZE(sizeclass); + return m; +} + +void* ponyint_heap_alloc_small_final(pony_actor_t* actor, heap_t* heap, + uint32_t sizeclass) +{ + chunk_t* chunk = heap->small_free[sizeclass]; + void* m; + + // If there are none in this size class, get a new one. + if(chunk != NULL) + { + // Clear and use the first available slot. + uint32_t slots = chunk->slots; + uint32_t bit = __pony_ctz(slots); + slots &= ~(1 << bit); + + m = chunk->m + (bit << HEAP_MINBITS); + chunk->slots = slots; + + // note that a finaliser needs to run + chunk->finalisers |= (1 << bit); + + if(slots == 0) + { + heap->small_free[sizeclass] = chunk->next; + chunk->next = heap->small_full[sizeclass]; + heap->small_full[sizeclass] = chunk; + } + } else { + chunk_t* n = (chunk_t*) POOL_ALLOC(chunk_t); + n->actor = actor; + n->m = (char*) POOL_ALLOC(block_t); + n->size = sizeclass; + + // note that a finaliser needs to run + n->finalisers = 1; + // Clear the first bit. n->shallow = n->slots = sizeclass_init[sizeclass]; n->next = NULL; @@ -265,6 +421,33 @@ void* ponyint_heap_alloc_large(pony_actor_t* actor, heap_t* heap, size_t size) chunk->slots = 0; chunk->shallow = 0; + // note that no finaliser needs to run + chunk->finalisers = 0; + + large_pagemap(chunk->m, size, chunk); + + chunk->next = heap->large; + heap->large = chunk; + heap->used += chunk->size; + + return chunk->m; +} + +void* ponyint_heap_alloc_large_final(pony_actor_t* actor, heap_t* heap, + size_t size) +{ + size = ponyint_pool_adjust_size(size); + + chunk_t* chunk = (chunk_t*) POOL_ALLOC(chunk_t); + chunk->actor = actor; + chunk->size = size; + chunk->m = (char*) ponyint_pool_alloc_size(size); + chunk->slots = 0; + chunk->shallow = 0; + + // note that a finaliser needs to run + chunk->finalisers = 1; + large_pagemap(chunk->m, size, chunk); chunk->next = heap->large; @@ -429,6 +612,9 @@ void ponyint_heap_free(chunk_t* chunk, void* p) { if(p == chunk->m) { + // run finaliser if needed + final_large(chunk, 0); + ponyint_pool_free_size(chunk->size, chunk->m); chunk->m = NULL; chunk->slots = 1; @@ -444,6 +630,17 @@ void ponyint_heap_free(chunk_t* chunk, void* p) // Shift to account for smallest allocation size. uint32_t slot = FIND_SLOT(ext, chunk->m); + // check if there's a finaliser to run + if((chunk->finalisers & slot) != 0) + { + // run finaliser + (*(pony_type_t**)p)->final(p); + + // clear finaliser + chunk->finalisers &= ~slot; + } + + // free slot chunk->slots |= slot; } } diff --git a/src/libponyrt/mem/heap.h b/src/libponyrt/mem/heap.h index f818c05032..783b6a2cef 100644 --- a/src/libponyrt/mem/heap.h +++ b/src/libponyrt/mem/heap.h @@ -37,6 +37,8 @@ void ponyint_heap_init(heap_t* heap); void ponyint_heap_destroy(heap_t* heap); +void ponyint_heap_final(heap_t* heap); + __pony_spec_malloc__( void* ponyint_heap_alloc(pony_actor_t* actor, heap_t* heap, size_t size) ); @@ -53,6 +55,20 @@ void* ponyint_heap_alloc_large(pony_actor_t* actor, heap_t* heap, size_t size) void* ponyint_heap_realloc(pony_actor_t* actor, heap_t* heap, void* p, size_t size); +__pony_spec_malloc__( + void* ponyint_heap_alloc_final(pony_actor_t* actor, heap_t* heap, size_t size) + ); + +__pony_spec_malloc__( +void* ponyint_heap_alloc_small_final(pony_actor_t* actor, heap_t* heap, + uint32_t sizeclass) + ); + +__pony_spec_malloc__( +void* ponyint_heap_alloc_large_final(pony_actor_t* actor, heap_t* heap, + size_t size) + ); + /** * Adds to the used memory figure kept by the heap. This allows objects * received in messages to count towards the GC heuristic. diff --git a/src/libponyrt/pony.h b/src/libponyrt/pony.h index 0ae47a3189..580fbb9dd1 100644 --- a/src/libponyrt/pony.h +++ b/src/libponyrt/pony.h @@ -209,8 +209,14 @@ PONY_API ATTRIBUTE_MALLOC void* pony_realloc(pony_ctx_t* ctx, void* p, size_t si * Attach a finaliser that will be run on memory when it is collected. Such * memory cannot be safely realloc'd. */ -PONY_API ATTRIBUTE_MALLOC void* pony_alloc_final(pony_ctx_t* ctx, size_t size, - pony_final_fn final); +PONY_API ATTRIBUTE_MALLOC void* pony_alloc_final(pony_ctx_t* ctx, size_t size); + +/// Allocate using a HEAP_INDEX instead of a size in bytes. +PONY_API ATTRIBUTE_MALLOC void* pony_alloc_small_final(pony_ctx_t* ctx, + uint32_t sizeclass); + +/// Allocate when we know it's larger than HEAP_MAX. +PONY_API ATTRIBUTE_MALLOC void* pony_alloc_large_final(pony_ctx_t* ctx, size_t size); /// Trigger GC next time the current actor is scheduled PONY_API void pony_triggergc(pony_actor_t* actor); diff --git a/src/libponyrt/sched/scheduler.h b/src/libponyrt/sched/scheduler.h index 103b91134a..1f4c972019 100644 --- a/src/libponyrt/sched/scheduler.h +++ b/src/libponyrt/sched/scheduler.h @@ -28,7 +28,6 @@ typedef struct pony_ctx_t trace_actor_fn trace_actor; gcstack_t* stack; actormap_t acquire; - bool finalising; void* serialise_buffer; size_t serialise_size;