From f84fb5b26f87169c00e4bdc3c4db050772974023 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Thu, 20 Apr 2023 12:52:20 -0400 Subject: [PATCH] Save a couple loads/stores in sweep pages (#49263) We did a load/store on every iteration. Keep a temporary in a register instead. It's a very small difference but it's visible in vtune. --- src/gc-debug.c | 4 ++-- src/gc.c | 31 +++++++++++++++++-------------- src/gc.h | 3 ++- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/gc-debug.c b/src/gc-debug.c index a233b18d7dcfc..2350a21958815 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -563,11 +563,11 @@ JL_NO_ASAN static void gc_scrub_range(char *low, char *high) // Find the age bit char *page_begin = gc_page_data(tag) + GC_PAGE_OFFSET; int obj_id = (((char*)tag) - page_begin) / osize; - uint8_t *ages = pg->ages + obj_id / 8; + uint32_t *ages = pg->ages + obj_id / 32; // Force this to be a young object to save some memory // (especially on 32bit where it's more likely to have pointer-like // bit patterns) - *ages &= ~(1 << (obj_id % 8)); + *ages &= ~(1 << (obj_id % 32)); memset(tag, 0xff, osize); // set mark to GC_MARKED (young and marked) tag->bits.gc = GC_MARKED; diff --git a/src/gc.c b/src/gc.c index e77ee90a34353..b1e29ca149810 100644 --- a/src/gc.c +++ b/src/gc.c @@ -976,8 +976,8 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o, page->has_young = 1; char *page_begin = gc_page_data(o) + GC_PAGE_OFFSET; int obj_id = (((char*)o) - page_begin) / page->osize; - uint8_t *ages = page->ages + obj_id / 8; - jl_atomic_fetch_and_relaxed((_Atomic(uint8_t)*)ages, ~(1 << (obj_id % 8))); + uint32_t *ages = page->ages + obj_id / 32; + jl_atomic_fetch_and_relaxed((_Atomic(uint32_t)*)ages, ~(1 << (obj_id % 32))); } } objprofile_count(jl_typeof(jl_valueof(o)), @@ -1406,7 +1406,7 @@ static NOINLINE jl_taggedvalue_t *add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT jl_ptls_t ptls = jl_current_task->ptls; jl_gc_pagemeta_t *pg = jl_gc_alloc_page(); pg->osize = p->osize; - pg->ages = (uint8_t*)malloc_s(GC_PAGE_SZ / 8 / p->osize + 1); + pg->ages = (uint32_t*)malloc_s(LLT_ALIGN(GC_PAGE_SZ / 8 / p->osize + 1, sizeof(uint32_t))); pg->thread_n = ptls->tid; jl_taggedvalue_t *fl = reset_page(ptls, p, pg, NULL); p->newpages = fl; @@ -1506,7 +1506,7 @@ int64_t lazy_freed_pages = 0; static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_taggedvalue_t **pfl, int sweep_full, int osize) JL_NOTSAFEPOINT { char *data = pg->data; - uint8_t *ages = pg->ages; + uint32_t *ages = pg->ages; jl_taggedvalue_t *v = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET); char *lim = (char*)v + GC_PAGE_SZ - GC_PAGE_OFFSET - osize; size_t old_nfree = pg->nfree; @@ -1557,18 +1557,25 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t int16_t prev_nold = 0; int pg_nfree = 0; jl_taggedvalue_t **pfl_begin = NULL; - uint8_t msk = 1; // mask for the age bit in the current age byte + uint32_t msk = 1; // mask for the age bit in the current age byte + uint32_t age = *ages; while ((char*)v <= lim) { + if (!msk) { + msk = 1; + *ages = age; + ages++; + age = *ages; + } int bits = v->bits.gc; if (!gc_marked(bits)) { *pfl = v; pfl = &v->next; pfl_begin = pfl_begin ? pfl_begin : pfl; pg_nfree++; - *ages &= ~msk; + age &= ~msk; } else { // marked young or old - if (*ages & msk || bits == GC_OLD_MARKED) { // old enough + if (age & msk || bits == GC_OLD_MARKED) { // old enough // `!age && bits == GC_OLD_MARKED` is possible for // non-first-class objects like array buffers // (they may get promoted by jl_gc_wb_buf for example, @@ -1584,17 +1591,13 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t has_young = 1; } has_marked |= gc_marked(bits); - *ages |= msk; + age |= msk; freedall = 0; } v = (jl_taggedvalue_t*)((char*)v + osize); msk <<= 1; - if (!msk) { - msk = 1; - ages++; - } } - + *ages = age; assert(!freedall); pg->has_marked = has_marked; pg->has_young = has_young; @@ -4017,7 +4020,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) goto valid_object; // We know now that the age bit reflects liveness status during // the last sweep and that the cell has not been reused since. - if (!(meta->ages[obj_id / 8] & (1 << (obj_id % 8)))) { + if (!(meta->ages[obj_id / 32] & (1 << (obj_id % 32)))) { return NULL; } // Not a freelist entry, therefore a valid object. diff --git a/src/gc.h b/src/gc.h index e0510d9bc3917..3961aeecada8c 100644 --- a/src/gc.h +++ b/src/gc.h @@ -9,6 +9,7 @@ #ifndef JL_GC_H #define JL_GC_H +#include #include #include #include @@ -170,7 +171,7 @@ typedef struct { uint16_t fl_end_offset; // offset of last free object in this page uint16_t thread_n; // thread id of the heap that owns this page char *data; - uint8_t *ages; + uint32_t *ages; } jl_gc_pagemeta_t; // Page layout: