diff --git a/src/gc-debug.c b/src/gc-debug.c index 02addaa98e44c8..2a25b02affd42e 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -27,19 +27,16 @@ jl_gc_pagemeta_t *jl_gc_page_metadata(void *data) // the end of the page. JL_DLLEXPORT jl_taggedvalue_t *jl_gc_find_taggedvalue_pool(char *p, size_t *osize_p) { - if (!page_metadata(p)) + if (!gc_alloc_map_is_set(p)) // Not in the pool return NULL; - struct jl_gc_metadata_ext info = page_metadata_ext(p); + jl_gc_pagemeta_t *meta = page_metadata(p); char *page_begin = gc_page_data(p) + GC_PAGE_OFFSET; // In the page header if (p < page_begin) return NULL; size_t ofs = p - page_begin; - // Check if this is a free page - if (!(info.pagetable0->allocmap[info.pagetable0_i32] & (uint32_t)(1 << info.pagetable0_i))) - return NULL; - int osize = info.meta->osize; + int osize = meta->osize; // Shouldn't be needed, just in case if (osize == 0) return NULL; @@ -111,44 +108,14 @@ static void gc_clear_mark_page(jl_gc_pagemeta_t *pg, int bits) } } -static void gc_clear_mark_pagetable0(pagetable0_t *pagetable0, int bits) -{ - for (int pg_i = 0; pg_i < REGION0_PG_COUNT / 32; pg_i++) { - uint32_t line = pagetable0->allocmap[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - gc_clear_mark_page(pagetable0->meta[pg_i * 32 + j], bits); - } - } - } - } -} - -static void gc_clear_mark_pagetable1(pagetable1_t *pagetable1, int bits) -{ - for (int pg_i = 0; pg_i < REGION1_PG_COUNT / 32; pg_i++) { - uint32_t line = pagetable1->allocmap0[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - gc_clear_mark_pagetable0(pagetable1->meta0[pg_i * 32 + j], bits); - } - } - } - } -} - -static void gc_clear_mark_pagetable(int bits) +static void gc_clear_mark_outer(int bits) { - for (int pg_i = 0; pg_i < (REGION2_PG_COUNT + 31) / 32; pg_i++) { - uint32_t line = memory_map.allocmap1[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - gc_clear_mark_pagetable1(memory_map.meta1[pg_i * 32 + j], bits); - } - } + for (int i = 0; i < gc_n_threads; i++) { + jl_ptls_t ptls2 = gc_all_tls_states[i]; + jl_gc_pagemeta_t *pg = ptls2->page_metadata_back; + while (pg != NULL) { + gc_clear_mark_page(pg, bits); + pg = pg->next; } } } @@ -184,7 +151,7 @@ static void clear_mark(int bits) v = v->next; } - gc_clear_mark_pagetable(bits); + gc_clear_mark_outer(bits); } static void restore(void) @@ -561,7 +528,6 @@ void gc_scrub_record_task(jl_task_t *t) JL_NO_ASAN static void gc_scrub_range(char *low, char *high) { - jl_ptls_t ptls = jl_current_task->ptls; jl_jmp_buf *old_buf = jl_get_safe_restore(); jl_jmp_buf buf; if (jl_setjmp(buf, 0)) { @@ -1168,7 +1134,7 @@ void gc_stats_big_obj(void) static int64_t poolobj_sizes[4]; static int64_t empty_pages; -static void gc_count_pool_page(jl_gc_pagemeta_t *pg) +static void gc_count_pool_page(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT { int osize = pg->osize; char *data = pg->data; @@ -1187,44 +1153,16 @@ static void gc_count_pool_page(jl_gc_pagemeta_t *pg) } } -static void gc_count_pool_pagetable0(pagetable0_t *pagetable0) -{ - for (int pg_i = 0; pg_i < REGION0_PG_COUNT / 32; pg_i++) { - uint32_t line = pagetable0->allocmap[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - gc_count_pool_page(pagetable0->meta[pg_i * 32 + j]); - } - } - } - } -} - -static void gc_count_pool_pagetable1(pagetable1_t *pagetable1) -{ - for (int pg_i = 0; pg_i < REGION1_PG_COUNT / 32; pg_i++) { - uint32_t line = pagetable1->allocmap0[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - gc_count_pool_pagetable0(pagetable1->meta0[pg_i * 32 + j]); - } - } - } - } -} - static void gc_count_pool_pagetable(void) { - for (int pg_i = 0; pg_i < (REGION2_PG_COUNT + 31) / 32; pg_i++) { - uint32_t line = memory_map.allocmap1[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - gc_count_pool_pagetable1(memory_map.meta1[pg_i * 32 + j]); - } + for (int i = 0; i < gc_n_threads; i++) { + jl_ptls_t ptls2 = gc_all_tls_states[i]; + jl_gc_pagemeta_t *pg = ptls2->page_metadata_back; + while (pg != NULL) { + if (gc_alloc_map_is_set(pg->data)) { + gc_count_pool_page(pg); } + pg = pg->next; } } } diff --git a/src/gc-pages.c b/src/gc-pages.c index 28daa9d67a9edb..91af6097c5363e 100644 --- a/src/gc-pages.c +++ b/src/gc-pages.c @@ -19,7 +19,6 @@ extern "C" { #define MIN_BLOCK_PG_ALLOC (1) // 16 KB static int block_pg_cnt = DEFAULT_BLOCK_PG_ALLOC; -static size_t current_pg_count = 0; void jl_gc_init_page(void) { @@ -33,9 +32,9 @@ void jl_gc_init_page(void) // Try to allocate a memory block for multiple pages // Return `NULL` if allocation failed. Result is aligned to `GC_PAGE_SZ`. -static char *jl_gc_try_alloc_pages(int pg_cnt) JL_NOTSAFEPOINT +char *jl_gc_try_alloc_pages(void) JL_NOTSAFEPOINT { - size_t pages_sz = GC_PAGE_SZ * pg_cnt; + size_t pages_sz = GC_PAGE_SZ * block_pg_cnt; #ifdef _OS_WINDOWS_ char *mem = (char*)VirtualAlloc(NULL, pages_sz + GC_PAGE_SZ, MEM_RESERVE, PAGE_READWRITE); @@ -56,231 +55,66 @@ static char *jl_gc_try_alloc_pages(int pg_cnt) JL_NOTSAFEPOINT return mem; } -// Allocate the memory for a new page. Starts with `block_pg_cnt` number -// of pages. Decrease 4x every time so that there are enough space for a few. -// more chunks (or other allocations). The final page count is recorded -// and will be used as the starting count next time. If the page count is -// smaller `MIN_BLOCK_PG_ALLOC` a `jl_memory_exception` is thrown. -// Assumes `gc_perm_lock` is acquired, the lock is released before the -// exception is thrown. -static jl_gc_pagemeta_t *jl_gc_alloc_new_page(void) JL_NOTSAFEPOINT -{ - // try to allocate a large block of memory (or a small one) - unsigned pg, pg_cnt = block_pg_cnt; - char *mem = NULL; - while (1) { - if (__likely((mem = jl_gc_try_alloc_pages(pg_cnt)))) - break; - size_t min_block_pg_alloc = MIN_BLOCK_PG_ALLOC; - if (GC_PAGE_SZ * min_block_pg_alloc < jl_page_size) - min_block_pg_alloc = jl_page_size / GC_PAGE_SZ; // exact division - if (pg_cnt >= 4 * min_block_pg_alloc) { - pg_cnt /= 4; - block_pg_cnt = pg_cnt; - } - else if (pg_cnt > min_block_pg_alloc) { - block_pg_cnt = pg_cnt = min_block_pg_alloc; - } - else { - uv_mutex_unlock(&gc_perm_lock); - jl_throw(jl_memory_exception); - } - } - - // now need to insert these pages into the pagetable metadata - // if any allocation fails, this just stops recording more pages from that point - // and will free (munmap) the remainder - jl_gc_pagemeta_t *page_meta = - (jl_gc_pagemeta_t*)jl_gc_perm_alloc_nolock(pg_cnt * sizeof(jl_gc_pagemeta_t), 1, - sizeof(void*), 0); - pg = 0; - if (page_meta) { - for (; pg < pg_cnt; pg++) { - struct jl_gc_metadata_ext info; - uint32_t msk; - unsigned i; - pagetable1_t **ppagetable1; - pagetable0_t **ppagetable0; - jl_gc_pagemeta_t **pmeta; - - char *ptr = mem + (GC_PAGE_SZ * pg); - page_meta[pg].data = ptr; - - // create & store the level 2 / outermost info - i = REGION_INDEX(ptr); - info.pagetable_i = i % 32; - info.pagetable_i32 = i / 32; - msk = (1u << info.pagetable_i); - if ((memory_map.freemap1[info.pagetable_i32] & msk) == 0) - memory_map.freemap1[info.pagetable_i32] |= msk; // has free - info.pagetable1 = *(ppagetable1 = &memory_map.meta1[i]); - if (!info.pagetable1) { - info.pagetable1 = (pagetable1_t*)jl_gc_perm_alloc_nolock(sizeof(pagetable1_t), 1, - sizeof(void*), 0); - *ppagetable1 = info.pagetable1; - if (!info.pagetable1) - break; - } - - // create & store the level 1 info - i = REGION1_INDEX(ptr); - info.pagetable1_i = i % 32; - info.pagetable1_i32 = i / 32; - msk = (1u << info.pagetable1_i); - if ((info.pagetable1->freemap0[info.pagetable1_i32] & msk) == 0) - info.pagetable1->freemap0[info.pagetable1_i32] |= msk; // has free - info.pagetable0 = *(ppagetable0 = &info.pagetable1->meta0[i]); - if (!info.pagetable0) { - info.pagetable0 = (pagetable0_t*)jl_gc_perm_alloc_nolock(sizeof(pagetable0_t), 1, - sizeof(void*), 0); - *ppagetable0 = info.pagetable0; - if (!info.pagetable0) - break; - } - - // create & store the level 0 / page info - i = REGION0_INDEX(ptr); - info.pagetable0_i = i % 32; - info.pagetable0_i32 = i / 32; - msk = (1u << info.pagetable0_i); - info.pagetable0->freemap[info.pagetable0_i32] |= msk; // is free - pmeta = &info.pagetable0->meta[i]; - info.meta = (*pmeta = &page_meta[pg]); - } - } - - if (pg < pg_cnt) { -#ifndef _OS_WINDOWS_ - // Trim the allocation to only cover the region - // that we successfully created the metadata for. - // This is not supported by the Windows kernel, - // so we have to just skip it there and just lose these virtual addresses. - munmap(mem + LLT_ALIGN(GC_PAGE_SZ * pg, jl_page_size), - GC_PAGE_SZ * pg_cnt - LLT_ALIGN(GC_PAGE_SZ * pg, jl_page_size)); -#endif - if (pg == 0) { - uv_mutex_unlock(&gc_perm_lock); - jl_throw(jl_memory_exception); - } - } - return page_meta; -} - // get a new page, either from the freemap // or from the kernel if none are available NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT { - struct jl_gc_metadata_ext info; - uv_mutex_lock(&gc_perm_lock); - int last_errno = errno; #ifdef _OS_WINDOWS_ DWORD last_error = GetLastError(); #endif - // scan over memory_map page-table for existing allocated but unused pages - for (info.pagetable_i32 = memory_map.lb; info.pagetable_i32 < (REGION2_PG_COUNT + 31) / 32; info.pagetable_i32++) { - uint32_t freemap1 = memory_map.freemap1[info.pagetable_i32]; - for (info.pagetable_i = 0; freemap1; info.pagetable_i++, freemap1 >>= 1) { - unsigned next = ffs_u32(freemap1); - info.pagetable_i += next; - freemap1 >>= next; - info.pagetable1 = memory_map.meta1[info.pagetable_i + info.pagetable_i32 * 32]; - // repeat over page-table level 1 - for (info.pagetable1_i32 = info.pagetable1->lb; info.pagetable1_i32 < REGION1_PG_COUNT / 32; info.pagetable1_i32++) { - uint32_t freemap0 = info.pagetable1->freemap0[info.pagetable1_i32]; - for (info.pagetable1_i = 0; freemap0; info.pagetable1_i++, freemap0 >>= 1) { - unsigned next = ffs_u32(freemap0); - info.pagetable1_i += next; - freemap0 >>= next; - info.pagetable0 = info.pagetable1->meta0[info.pagetable1_i + info.pagetable1_i32 * 32]; - // repeat over page-table level 0 - for (info.pagetable0_i32 = info.pagetable0->lb; info.pagetable0_i32 < REGION0_PG_COUNT / 32; info.pagetable0_i32++) { - uint32_t freemap = info.pagetable0->freemap[info.pagetable0_i32]; - if (freemap) { - info.pagetable0_i = ffs_u32(freemap); - info.meta = info.pagetable0->meta[info.pagetable0_i + info.pagetable0_i32 * 32]; - assert(info.meta->data); - // new pages available starting at min of lb and pagetable_i32 - if (memory_map.lb < info.pagetable_i32) - memory_map.lb = info.pagetable_i32; - if (info.pagetable1->lb < info.pagetable1_i32) - info.pagetable1->lb = info.pagetable1_i32; - if (info.pagetable0->lb < info.pagetable0_i32) - info.pagetable0->lb = info.pagetable0_i32; - goto have_free_page; // break out of all of these loops - } - } - info.pagetable1->freemap0[info.pagetable1_i32] &= ~(uint32_t)(1u << info.pagetable1_i); // record that this was full - } - } - memory_map.freemap1[info.pagetable_i32] &= ~(uint32_t)(1u << info.pagetable_i); // record that this was full - } + jl_gc_pagemeta_t *meta = NULL; + + jl_mutex_lock_nogc(&global_page_pool_clean.lock); + meta = pop_page_metadata_back(&global_page_pool_clean.page_metadata_back); + jl_mutex_unlock_nogc(&global_page_pool_clean.lock); + if (meta != NULL) { + gc_alloc_map_set(meta->data, 1); + goto exit; } - // no existing pages found, allocate a new one - { - jl_gc_pagemeta_t *meta = jl_gc_alloc_new_page(); - info = page_metadata_ext(meta->data); - assert(meta == info.meta); - // new pages are now available starting at max of lb and pagetable_i32 - if (memory_map.lb > info.pagetable_i32) - memory_map.lb = info.pagetable_i32; - if (info.pagetable1->lb > info.pagetable1_i32) - info.pagetable1->lb = info.pagetable1_i32; - if (info.pagetable0->lb > info.pagetable0_i32) - info.pagetable0->lb = info.pagetable0_i32; + jl_mutex_lock_nogc(&global_page_pool_madvised.lock); + meta = pop_page_metadata_back(&global_page_pool_madvised.page_metadata_back); + jl_mutex_unlock_nogc(&global_page_pool_madvised.lock); + if (meta != NULL) { + gc_alloc_map_set(meta->data, 1); + goto exit; } -have_free_page: - // in-use pages are now ending at min of ub and pagetable_i32 - if (memory_map.ub < info.pagetable_i32) - memory_map.ub = info.pagetable_i32; - if (info.pagetable1->ub < info.pagetable1_i32) - info.pagetable1->ub = info.pagetable1_i32; - if (info.pagetable0->ub < info.pagetable0_i32) - info.pagetable0->ub = info.pagetable0_i32; - - // mark this entry as in-use and not free - info.pagetable0->freemap[info.pagetable0_i32] &= ~(uint32_t)(1u << info.pagetable0_i); - info.pagetable0->allocmap[info.pagetable0_i32] |= (uint32_t)(1u << info.pagetable0_i); - info.pagetable1->allocmap0[info.pagetable1_i32] |= (uint32_t)(1u << info.pagetable1_i); - memory_map.allocmap1[info.pagetable_i32] |= (uint32_t)(1u << info.pagetable_i); - -#ifdef _OS_WINDOWS_ - VirtualAlloc(info.meta->data, GC_PAGE_SZ, MEM_COMMIT, PAGE_READWRITE); -#endif + char *data = jl_gc_try_alloc_pages(); + if (data == NULL) { + jl_throw(jl_memory_exception); + } + meta = (jl_gc_pagemeta_t*)malloc_s(block_pg_cnt * sizeof(jl_gc_pagemeta_t)); + for (int i = 0; i < block_pg_cnt; i++) { + jl_gc_pagemeta_t *pg = &meta[i]; + pg->data = data + GC_PAGE_SZ * i; + jl_mutex_lock_nogc(&alloc_map.lock); + gc_alloc_map_maybe_create(pg->data); + jl_mutex_unlock_nogc(&alloc_map.lock); + if (i == 0) { + gc_alloc_map_set(pg->data, 1); + } + else { + jl_mutex_lock_nogc(&global_page_pool_clean.lock); + push_page_metadata_back(&global_page_pool_clean.page_metadata_back, pg); + jl_mutex_unlock_nogc(&global_page_pool_clean.lock); + } + } +exit: #ifdef _OS_WINDOWS_ + VirtualAlloc(meta->data, GC_PAGE_SZ, MEM_COMMIT, PAGE_READWRITE); SetLastError(last_error); #endif errno = last_errno; - current_pg_count++; - gc_final_count_page(current_pg_count); - uv_mutex_unlock(&gc_perm_lock); - return info.meta; + return meta; } // return a page to the freemap allocator -void jl_gc_free_page(void *p) JL_NOTSAFEPOINT +void jl_gc_free_page(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT { - // update the allocmap and freemap to indicate this contains a free entry - struct jl_gc_metadata_ext info = page_metadata_ext(p); - uint32_t msk; - msk = (uint32_t)(1u << info.pagetable0_i); - assert(!(info.pagetable0->freemap[info.pagetable0_i32] & msk)); - assert(info.pagetable0->allocmap[info.pagetable0_i32] & msk); - info.pagetable0->allocmap[info.pagetable0_i32] &= ~msk; - info.pagetable0->freemap[info.pagetable0_i32] |= msk; - - msk = (uint32_t)(1u << info.pagetable1_i); - assert(info.pagetable1->allocmap0[info.pagetable1_i32] & msk); - if ((info.pagetable1->freemap0[info.pagetable1_i32] & msk) == 0) - info.pagetable1->freemap0[info.pagetable1_i32] |= msk; - - msk = (uint32_t)(1u << info.pagetable_i); - assert(memory_map.allocmap1[info.pagetable_i32] & msk); - if ((memory_map.freemap1[info.pagetable_i32] & msk) == 0) - memory_map.freemap1[info.pagetable_i32] |= msk; - + void *p = pg->data; + gc_alloc_map_set((char*)p, 0); // tell the OS we don't need these pages right now size_t decommit_size = GC_PAGE_SZ; if (GC_PAGE_SZ < jl_page_size) { @@ -290,10 +124,9 @@ void jl_gc_free_page(void *p) JL_NOTSAFEPOINT void *otherp = (void*)((uintptr_t)p & ~(jl_page_size - 1)); // round down to the nearest physical page p = otherp; while (n_pages--) { - struct jl_gc_metadata_ext info = page_metadata_ext(otherp); - msk = (uint32_t)(1u << info.pagetable0_i); - if (info.pagetable0->allocmap[info.pagetable0_i32] & msk) - goto no_decommit; + if (gc_alloc_map_is_set((char*)otherp)) { + return; + } otherp = (void*)((char*)otherp + GC_PAGE_SZ); } } @@ -313,20 +146,7 @@ void jl_gc_free_page(void *p) JL_NOTSAFEPOINT #else madvise(p, decommit_size, MADV_DONTNEED); #endif - /* TODO: Should we leave this poisoned and rather allow the GC to read poisoned pointers from - * the page when it sweeps pools? - */ msan_unpoison(p, decommit_size); - -no_decommit: - // new pages are now available starting at max of lb and pagetable_i32 - if (memory_map.lb > info.pagetable_i32) - memory_map.lb = info.pagetable_i32; - if (info.pagetable1->lb > info.pagetable1_i32) - info.pagetable1->lb = info.pagetable1_i32; - if (info.pagetable0->lb > info.pagetable0_i32) - info.pagetable0->lb = info.pagetable0_i32; - current_pg_count--; } #ifdef __cplusplus diff --git a/src/gc.c b/src/gc.c index 00b0102f72653b..45d7752550dde2 100644 --- a/src/gc.c +++ b/src/gc.c @@ -178,8 +178,6 @@ JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) return jl_buff_tag; } -pagetable_t memory_map; - // List of marked big objects. Not per-thread. Accessed only by master thread. bigval_t *big_objects_marked = NULL; @@ -819,7 +817,7 @@ FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_N STATIC_INLINE void gc_setmark_big(jl_ptls_t ptls, jl_taggedvalue_t *o, uint8_t mark_mode) JL_NOTSAFEPOINT { - assert(!page_metadata(o)); + assert(!gc_alloc_map_is_set((char*)o)); bigval_t *hdr = bigval_header(o); if (mark_mode == GC_OLD_MARKED) { ptls->gc_cache.perm_scanned_bytes += hdr->sz & ~3; @@ -842,13 +840,17 @@ STATIC_INLINE void gc_setmark_big(jl_ptls_t ptls, jl_taggedvalue_t *o, // This function should be called exactly once during marking for each pool // object being marked to update the page metadata. STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o, - uint8_t mark_mode, - jl_gc_pagemeta_t *page) JL_NOTSAFEPOINT + uint8_t mark_mode) JL_NOTSAFEPOINT { #ifdef MEMDEBUG gc_setmark_big(ptls, o, mark_mode); #else - jl_assume(page); + jl_gc_pagemeta_t *page = NULL; + char *_o = (char*)o; + if (!gc_alloc_map_is_set(_o)) { + return; + } + page = jl_assume(page_metadata(_o)); if (mark_mode == GC_OLD_MARKED) { ptls->gc_cache.perm_scanned_bytes += page->osize; static_assert(sizeof(_Atomic(uint16_t)) == sizeof(page->nold), ""); @@ -869,7 +871,7 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o, STATIC_INLINE void gc_setmark_pool(jl_ptls_t ptls, jl_taggedvalue_t *o, uint8_t mark_mode) JL_NOTSAFEPOINT { - gc_setmark_pool_(ptls, o, mark_mode, page_metadata(o)); + gc_setmark_pool_(ptls, o, mark_mode); } STATIC_INLINE void gc_setmark(jl_ptls_t ptls, jl_taggedvalue_t *o, @@ -893,11 +895,8 @@ STATIC_INLINE void gc_setmark_buf_(jl_ptls_t ptls, void *o, uint8_t mark_mode, s // sure. if (__likely(gc_try_setmark_tag(buf, mark_mode)) && !gc_verifying) { if (minsz <= GC_MAX_SZCLASS) { - jl_gc_pagemeta_t *page = page_metadata(buf); - if (page != NULL) { - gc_setmark_pool_(ptls, buf, bits, page); - return; - } + gc_setmark_pool_(ptls, buf, bits); + return; } gc_setmark_big(ptls, buf, bits); } @@ -1243,6 +1242,10 @@ STATIC_INLINE jl_taggedvalue_t *gc_reset_page(jl_ptls_t ptls2, const jl_gc_pool_ return beg; } +jl_gc_global_page_pool_t global_page_pool_clean; +jl_gc_global_page_pool_t global_page_pool_madvised; +jl_gc_alloc_map_t alloc_map; + // Add a new page to the pool. Discards any pages in `p->newpages` before. static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT { @@ -1252,6 +1255,8 @@ static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT jl_gc_pagemeta_t *pg = jl_gc_alloc_page(); pg->osize = p->osize; pg->thread_n = ptls->tid; + set_page_metadata(pg); + push_page_metadata_back(&ptls->page_metadata_back, pg); jl_taggedvalue_t *fl = gc_reset_page(ptls, p, pg, NULL); p->newpages = fl; return fl; @@ -1348,7 +1353,7 @@ int jl_gc_classify_pools(size_t sz, int *osize) int64_t lazy_freed_pages = 0; // Returns pointer to terminal pointer of list rooted at *pfl. -static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_taggedvalue_t **pfl, int sweep_full, int osize) JL_NOTSAFEPOINT +static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **scratch, jl_gc_pagemeta_t *pg, jl_taggedvalue_t **pfl, int sweep_full, int osize) JL_NOTSAFEPOINT { char *data = pg->data; jl_taggedvalue_t *v = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET); @@ -1356,6 +1361,7 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t size_t old_nfree = pg->nfree; size_t nfree; + int push_to_scratch = 1; int freedall = 1; int pg_skpd = 1; if (!pg->has_marked) { @@ -1372,7 +1378,7 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t lazy_freed_pages++; } else { - jl_gc_free_page(data); + push_to_scratch = 0; } nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / osize; goto done; @@ -1440,97 +1446,27 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t nfree = pg->nfree; done: + if (push_to_scratch) { + push_page_metadata_back(scratch, pg); + } + else { + jl_gc_free_page(pg); + push_page_metadata_back(&global_page_pool_madvised.page_metadata_back, pg); + } gc_time_count_page(freedall, pg_skpd); gc_num.freed += (nfree - old_nfree) * osize; return pfl; } // the actual sweeping over all allocated pages in a memory pool -STATIC_INLINE void sweep_pool_page(jl_taggedvalue_t ***pfl, jl_gc_pagemeta_t *pg, int sweep_full) JL_NOTSAFEPOINT +STATIC_INLINE void gc_sweep_pool_page(jl_taggedvalue_t ***pfl, jl_gc_pagemeta_t **scratch, jl_gc_pagemeta_t *pg, int sweep_full) JL_NOTSAFEPOINT { int p_n = pg->pool_n; int t_n = pg->thread_n; jl_ptls_t ptls2 = gc_all_tls_states[t_n]; jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n]; int osize = pg->osize; - pfl[t_n * JL_GC_N_POOLS + p_n] = sweep_page(p, pg, pfl[t_n * JL_GC_N_POOLS + p_n], sweep_full, osize); -} - -// sweep over a pagetable0 for all allocated pages -STATIC_INLINE int sweep_pool_pagetable0(jl_taggedvalue_t ***pfl, pagetable0_t *pagetable0, int sweep_full) JL_NOTSAFEPOINT -{ - unsigned ub = 0; - unsigned alloc = 0; - for (unsigned pg_i = 0; pg_i <= pagetable0->ub; pg_i++) { - uint32_t line = pagetable0->allocmap[pg_i]; - unsigned j; - if (!line) - continue; - ub = pg_i; - alloc = 1; - for (j = 0; line; j++, line >>= 1) { - unsigned next = ffs_u32(line); - j += next; - line >>= next; - jl_gc_pagemeta_t *pg = pagetable0->meta[pg_i * 32 + j]; - sweep_pool_page(pfl, pg, sweep_full); - } - } - pagetable0->ub = ub; - return alloc; -} - -// sweep over pagetable1 for all pagetable0 that may contain allocated pages -STATIC_INLINE int sweep_pool_pagetable1(jl_taggedvalue_t ***pfl, pagetable1_t *pagetable1, int sweep_full) JL_NOTSAFEPOINT -{ - unsigned ub = 0; - unsigned alloc = 0; - for (unsigned pg_i = 0; pg_i <= pagetable1->ub; pg_i++) { - uint32_t line = pagetable1->allocmap0[pg_i]; - unsigned j; - for (j = 0; line; j++, line >>= 1) { - unsigned next = ffs_u32(line); - j += next; - line >>= next; - pagetable0_t *pagetable0 = pagetable1->meta0[pg_i * 32 + j]; - if (pagetable0 && !sweep_pool_pagetable0(pfl, pagetable0, sweep_full)) - pagetable1->allocmap0[pg_i] &= ~(1 << j); // no allocations found, remember that for next time - } - if (pagetable1->allocmap0[pg_i]) { - ub = pg_i; - alloc = 1; - } - } - pagetable1->ub = ub; - return alloc; -} - -// sweep over all memory for all pagetable1 that may contain allocated pages -static void sweep_pool_pagetable(jl_taggedvalue_t ***pfl, int sweep_full) JL_NOTSAFEPOINT -{ - if (REGION2_PG_COUNT == 1) { // compile-time optimization - pagetable1_t *pagetable1 = memory_map.meta1[0]; - if (pagetable1 != NULL) - sweep_pool_pagetable1(pfl, pagetable1, sweep_full); - return; - } - unsigned ub = 0; - for (unsigned pg_i = 0; pg_i <= memory_map.ub; pg_i++) { - uint32_t line = memory_map.allocmap1[pg_i]; - unsigned j; - for (j = 0; line; j++, line >>= 1) { - unsigned next = ffs_u32(line); - j += next; - line >>= next; - pagetable1_t *pagetable1 = memory_map.meta1[pg_i * 32 + j]; - if (pagetable1 && !sweep_pool_pagetable1(pfl, pagetable1, sweep_full)) - memory_map.allocmap1[pg_i] &= ~(1 << j); // no allocations found, remember that for next time - } - if (memory_map.allocmap1[pg_i]) { - ub = pg_i; - } - } - memory_map.ub = ub; + pfl[t_n * JL_GC_N_POOLS + p_n] = gc_sweep_page(p, scratch, pg, pfl[t_n * JL_GC_N_POOLS + p_n], sweep_full, osize); } // sweep over all memory that is being used and not in a pool @@ -1601,11 +1537,16 @@ static void gc_sweep_pool(int sweep_full) } p->newpages = NULL; } + jl_gc_pagemeta_t *scratch = NULL; + jl_gc_pagemeta_t *pg = ptls2->page_metadata_back; + while (pg != NULL) { + jl_gc_pagemeta_t *pg2 = pg->next; + gc_sweep_pool_page(pfl, &scratch, pg, sweep_full); + pg = pg2; + } + ptls2->page_metadata_back = scratch; } - // the actual sweeping - sweep_pool_pagetable(pfl, sweep_full); - // null out terminal pointers of free lists for (int t_i = 0; t_i < n_threads; t_i++) { jl_ptls_t ptls2 = gc_all_tls_states[t_i]; @@ -3574,7 +3515,9 @@ void jl_init_thread_heap(jl_ptls_t ptls) // System-wide initializations void jl_gc_init(void) { - + JL_MUTEX_INIT(&alloc_map.lock, "alloc_map_lock"); + JL_MUTEX_INIT(&global_page_pool_clean.lock, "global_page_pool_clean_lock"); + JL_MUTEX_INIT(&global_page_pool_madvised.lock, "global_page_pool_madvised_lock"); JL_MUTEX_INIT(&heapsnapshot_lock, "heapsnapshot_lock"); JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock"); uv_mutex_init(&gc_cache_lock); @@ -4030,8 +3973,8 @@ JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void) JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) { p = (char *) p - 1; - jl_gc_pagemeta_t *meta = page_metadata(p); - if (meta) { + if (gc_alloc_map_is_set((char*)p)) { + jl_gc_pagemeta_t *meta = page_metadata(p); char *page = gc_page_data(p); // offset within page. size_t off = (char *)p - page; diff --git a/src/gc.h b/src/gc.h index 47aab660c09817..9b3bef9cd0fcb5 100644 --- a/src/gc.h +++ b/src/gc.h @@ -34,7 +34,7 @@ extern "C" { #define GC_PAGE_LG2 14 // log2(size of a page) #define GC_PAGE_SZ (1 << GC_PAGE_LG2) // 16k -#define GC_PAGE_OFFSET (JL_HEAP_ALIGNMENT - (sizeof(jl_taggedvalue_t) % JL_HEAP_ALIGNMENT)) +#define GC_PAGE_OFFSET (2 * JL_HEAP_ALIGNMENT - (sizeof(jl_taggedvalue_t) % JL_HEAP_ALIGNMENT)) #define jl_malloc_tag ((void*)0xdeadaa01) #define jl_singleton_tag ((void*)0xdeadaa02) @@ -143,7 +143,8 @@ typedef struct _mallocarray_t { } mallocarray_t; // pool page metadata -typedef struct { +typedef struct _jl_gc_pagemeta_t { + struct _jl_gc_pagemeta_t *next; // index of pool that owns this page uint8_t pool_n; // Whether any cell in the page is marked @@ -177,74 +178,98 @@ typedef struct { char *data; } jl_gc_pagemeta_t; +typedef struct { + jl_mutex_t lock; + jl_gc_pagemeta_t *page_metadata_back; +} jl_gc_global_page_pool_t; + +extern jl_gc_global_page_pool_t global_page_pool_clean; +extern jl_gc_global_page_pool_t global_page_pool_madvised; + +#define GC_ALLOC_MAP_LG2 ((sizeof(void*) * 8 - GC_PAGE_LG2) / 2) + +typedef struct { + uint8_t alloc[1ull << GC_ALLOC_MAP_LG2]; +} jl_gc_alloc_map_inner_t; + +typedef struct { + jl_mutex_t lock; + jl_gc_alloc_map_inner_t *alloc[1ull << GC_ALLOC_MAP_LG2]; +} jl_gc_alloc_map_t; + +extern jl_gc_alloc_map_t alloc_map; + +STATIC_INLINE size_t gc_alloc_map_inner_idx(void *_data) JL_NOTSAFEPOINT +{ + return (size_t)(((uintptr_t)(_data) << GC_ALLOC_MAP_LG2) >> (GC_ALLOC_MAP_LG2 + GC_PAGE_LG2)); +} + +STATIC_INLINE size_t gc_alloc_map_idx(void *_data) JL_NOTSAFEPOINT +{ + return (size_t)((uintptr_t)(_data) >> (GC_ALLOC_MAP_LG2 + GC_PAGE_LG2)); +} + +STATIC_INLINE uint8_t gc_alloc_map_is_set(char *_data) JL_NOTSAFEPOINT +{ + jl_gc_alloc_map_inner_t *inner = alloc_map.alloc[gc_alloc_map_idx(_data)]; + if (inner != NULL) { + return inner->alloc[gc_alloc_map_inner_idx(_data)]; + } + return 0; +} + +STATIC_INLINE void gc_alloc_map_maybe_create(char *_data) JL_NOTSAFEPOINT +{ + jl_gc_alloc_map_inner_t *inner = alloc_map.alloc[gc_alloc_map_idx(_data)]; + if (inner == NULL) { + inner = (jl_gc_alloc_map_inner_t*)calloc_s(sizeof(jl_gc_alloc_map_inner_t)); + alloc_map.alloc[gc_alloc_map_idx(_data)] = inner; + } +} + +STATIC_INLINE void gc_alloc_map_set(char *_data, uint8_t v) JL_NOTSAFEPOINT +{ + jl_gc_alloc_map_inner_t *inner = alloc_map.alloc[gc_alloc_map_idx(_data)]; + assert(inner != NULL); + inner->alloc[gc_alloc_map_inner_idx(_data)] = v; +} // Page layout: // Newpage freelist: sizeof(void*) -// Padding: GC_PAGE_OFFSET - sizeof(void*) +// Metadata pointer: sizeof(jl_gc_pagemeta_t*) +// Padding: GC_PAGE_OFFSET - sizeof(void*) - sizeof(jl_gc_pagemeta_t*) // Blocks: osize * n // Tag: sizeof(jl_taggedvalue_t) // Data: <= osize - sizeof(jl_taggedvalue_t) -// Memory map: -// The complete address space is divided up into a multi-level page table. -// The three levels have similar but slightly different structures: -// - pagetable0_t: the bottom/leaf level (covers the contiguous addresses) -// - pagetable1_t: the middle level -// - pagetable2_t: the top/leaf level (covers the entire virtual address space) -// Corresponding to these similar structures is a large amount of repetitive -// code that is nearly the same but not identical. It could be made less -// repetitive with C macros, but only at the cost of debuggability. The specialized -// structure of this representation allows us to partially unroll and optimize -// various conditions at each level. - -// The following constants define the branching factors at each level. -// The constants and GC_PAGE_LG2 must therefore sum to sizeof(void*). -// They should all be multiples of 32 (sizeof(uint32_t)) except that REGION2_PG_COUNT may also be 1. -#ifdef _P64 -#define REGION0_PG_COUNT (1 << 16) -#define REGION1_PG_COUNT (1 << 16) -#define REGION2_PG_COUNT (1 << 18) -#define REGION0_INDEX(p) (((uintptr_t)(p) >> 14) & 0xFFFF) // shift by GC_PAGE_LG2 -#define REGION1_INDEX(p) (((uintptr_t)(p) >> 30) & 0xFFFF) -#define REGION_INDEX(p) (((uintptr_t)(p) >> 46) & 0x3FFFF) -#else -#define REGION0_PG_COUNT (1 << 8) -#define REGION1_PG_COUNT (1 << 10) -#define REGION2_PG_COUNT (1 << 0) -#define REGION0_INDEX(p) (((uintptr_t)(p) >> 14) & 0xFF) // shift by GC_PAGE_LG2 -#define REGION1_INDEX(p) (((uintptr_t)(p) >> 22) & 0x3FF) -#define REGION_INDEX(p) (0) -#endif +STATIC_INLINE char *gc_page_data(void *x) JL_NOTSAFEPOINT +{ + return (char*)(((uintptr_t)x >> GC_PAGE_LG2) << GC_PAGE_LG2); +} -// define the representation of the levels of the page-table (0 to 2) -typedef struct { - jl_gc_pagemeta_t *meta[REGION0_PG_COUNT]; - uint32_t allocmap[REGION0_PG_COUNT / 32]; - uint32_t freemap[REGION0_PG_COUNT / 32]; - // store a lower bound of the first free page in each region - int lb; - // an upper bound of the last non-free page - int ub; -} pagetable0_t; +STATIC_INLINE jl_gc_pagemeta_t *page_metadata(void *_data) JL_NOTSAFEPOINT +{ + return *(jl_gc_pagemeta_t**)(gc_page_data(_data) + sizeof(void*)); +} -typedef struct { - pagetable0_t *meta0[REGION1_PG_COUNT]; - uint32_t allocmap0[REGION1_PG_COUNT / 32]; - uint32_t freemap0[REGION1_PG_COUNT / 32]; - // store a lower bound of the first free page in each region - int lb; - // an upper bound of the last non-free page - int ub; -} pagetable1_t; +STATIC_INLINE void set_page_metadata(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT +{ + *(jl_gc_pagemeta_t**)(pg->data + sizeof(void*)) = pg; +} -typedef struct { - pagetable1_t *meta1[REGION2_PG_COUNT]; - uint32_t allocmap1[(REGION2_PG_COUNT + 31) / 32]; - uint32_t freemap1[(REGION2_PG_COUNT + 31) / 32]; - // store a lower bound of the first free page in each region - int lb; - // an upper bound of the last non-free page - int ub; -} pagetable_t; +STATIC_INLINE void push_page_metadata_back(jl_gc_pagemeta_t **ppg, jl_gc_pagemeta_t *elt) JL_NOTSAFEPOINT +{ + elt->next = *ppg; + *ppg = elt; +} + +STATIC_INLINE jl_gc_pagemeta_t *pop_page_metadata_back(jl_gc_pagemeta_t **ppg) JL_NOTSAFEPOINT +{ + jl_gc_pagemeta_t *v = *ppg; + if (*ppg != NULL) { + *ppg = (*ppg)->next; + } + return v; +} #ifdef __clang_gcanalyzer__ /* clang may not have __builtin_ffs */ unsigned ffs_u32(uint32_t bitvec) JL_NOTSAFEPOINT; @@ -256,7 +281,6 @@ STATIC_INLINE unsigned ffs_u32(uint32_t bitvec) #endif extern jl_gc_num_t gc_num; -extern pagetable_t memory_map; extern bigval_t *big_objects_marked; extern arraylist_t finalizer_list_marked; extern arraylist_t to_finalize; @@ -269,12 +293,6 @@ STATIC_INLINE bigval_t *bigval_header(jl_taggedvalue_t *o) JL_NOTSAFEPOINT return container_of(o, bigval_t, header); } -// round an address inside a gcpage's data to its beginning -STATIC_INLINE char *gc_page_data(void *x) JL_NOTSAFEPOINT -{ - return (char*)(((uintptr_t)x >> GC_PAGE_LG2) << GC_PAGE_LG2); -} - STATIC_INLINE jl_taggedvalue_t *page_pfl_beg(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT { return (jl_taggedvalue_t*)(p->data + p->fl_begin_offset); @@ -312,52 +330,6 @@ STATIC_INLINE void *gc_ptr_clear_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT NOINLINE uintptr_t gc_get_stack_ptr(void); -STATIC_INLINE jl_gc_pagemeta_t *page_metadata(void *_data) JL_NOTSAFEPOINT -{ - uintptr_t data = ((uintptr_t)_data); - unsigned i; - i = REGION_INDEX(data); - pagetable1_t *r1 = memory_map.meta1[i]; - if (!r1) - return NULL; - i = REGION1_INDEX(data); - pagetable0_t *r0 = r1->meta0[i]; - if (!r0) - return NULL; - i = REGION0_INDEX(data); - return r0->meta[i]; -} - -struct jl_gc_metadata_ext { - pagetable1_t *pagetable1; - pagetable0_t *pagetable0; - jl_gc_pagemeta_t *meta; - unsigned pagetable_i32, pagetable_i; - unsigned pagetable1_i32, pagetable1_i; - unsigned pagetable0_i32, pagetable0_i; -}; - -STATIC_INLINE struct jl_gc_metadata_ext page_metadata_ext(void *_data) JL_NOTSAFEPOINT -{ - uintptr_t data = (uintptr_t)_data; - struct jl_gc_metadata_ext info; - unsigned i; - i = REGION_INDEX(data); - info.pagetable_i = i % 32; - info.pagetable_i32 = i / 32; - info.pagetable1 = memory_map.meta1[i]; - i = REGION1_INDEX(data); - info.pagetable1_i = i % 32; - info.pagetable1_i32 = i / 32; - info.pagetable0 = info.pagetable1->meta0[i]; - i = REGION0_INDEX(data); - info.pagetable0_i = i % 32; - info.pagetable0_i32 = i / 32; - info.meta = info.pagetable0->meta[i]; - assert(info.meta); - return info; -} - STATIC_INLINE void gc_big_object_unlink(const bigval_t *hdr) JL_NOTSAFEPOINT { *hdr->prev = hdr->next; @@ -379,10 +351,8 @@ extern uv_mutex_t gc_threads_lock; extern uv_cond_t gc_threads_cond; extern _Atomic(int) gc_n_threads_marking; void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq); -void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, - jl_value_t **fl_end) JL_NOTSAFEPOINT; -void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, - size_t start) JL_NOTSAFEPOINT; +void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t **fl_end) JL_NOTSAFEPOINT; +void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, size_t start) JL_NOTSAFEPOINT; void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq); void gc_mark_loop_serial(jl_ptls_t ptls); void gc_mark_loop_parallel(jl_ptls_t ptls, int master); @@ -391,9 +361,9 @@ void jl_gc_debug_init(void); // GC pages -void jl_gc_init_page(void); +void jl_gc_init_page(void) JL_NOTSAFEPOINT; NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT; -void jl_gc_free_page(void *p) JL_NOTSAFEPOINT; +void jl_gc_free_page(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT; // GC debug diff --git a/src/julia_threads.h b/src/julia_threads.h index c8242d6d6eb0ff..ab289196cd92b2 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -198,6 +198,7 @@ typedef struct { } jl_gc_mark_cache_t; struct _jl_bt_element_t; +struct _jl_gc_pagemeta_t; // This includes all the thread local states we care about for a thread. // Changes to TLS field types must be reflected in codegen. @@ -260,6 +261,7 @@ typedef struct _jl_tls_states_t { #endif jl_thread_t system_id; arraylist_t finalizers; + struct _jl_gc_pagemeta_t *page_metadata_back; jl_gc_markqueue_t mark_queue; jl_gc_mark_cache_t gc_cache; arraylist_t sweep_objs; diff --git a/src/support/dtypes.h b/src/support/dtypes.h index a30fe85ccc0d0c..da570921c101c6 100644 --- a/src/support/dtypes.h +++ b/src/support/dtypes.h @@ -340,6 +340,23 @@ STATIC_INLINE void jl_store_unaligned_i16(void *ptr, uint16_t val) JL_NOTSAFEPOI memcpy(ptr, &val, 2); } +STATIC_INLINE void *calloc_s(size_t sz) JL_NOTSAFEPOINT { + int last_errno = errno; +#ifdef _OS_WINDOWS_ + DWORD last_error = GetLastError(); +#endif + void *p = calloc(sz == 0 ? 1 : sz, 1); + if (p == NULL) { + perror("(julia) calloc"); + abort(); + } +#ifdef _OS_WINDOWS_ + SetLastError(last_error); +#endif + errno = last_errno; + return p; +} + STATIC_INLINE void *malloc_s(size_t sz) JL_NOTSAFEPOINT { int last_errno = errno; #ifdef _OS_WINDOWS_