From d447b6980856df7e0050ecaba4fd6cf21747d4f2 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 12 Feb 2020 16:23:54 -0500 Subject: [PATCH] mimalloc: changes to support separate heaps These are changes to support separate heaps for Python objects, Python objects with GC header, and non Python objects. --- Include/cpython/pystate.h | 9 +++ Include/mimalloc/mimalloc-internal.h | 9 ++- Include/mimalloc/mimalloc-types.h | 8 +- Include/mimalloc/mimalloc.h | 10 +++ Objects/mimalloc/heap.c | 6 ++ Objects/mimalloc/init.c | 106 +++++++++++---------------- Objects/mimalloc/page.c | 2 + Objects/mimalloc/segment.c | 34 +++++++-- Python/pystate.c | 12 +++ 9 files changed, 123 insertions(+), 73 deletions(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 7d3733ed658..a07a305efea 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -107,6 +107,13 @@ typedef struct _stack_chunk { PyObject * data[1]; /* Variable sized */ } _PyStackChunk; +struct mi_heap_s; +typedef struct mi_heap_s mi_heap_t; + +// must match MI_NUM_HEAPS in mimalloc.h +#define Py_NUM_HEAPS 5 + +// The PyThreadState typedef is in Include/pystate.h. struct _ts { /* See Python/ceval.c for comments explaining most fields */ @@ -119,6 +126,8 @@ struct _ts { uintptr_t eval_breaker; + mi_heap_t *heaps[Py_NUM_HEAPS]; + /* Has been initialized to a safe state. In order to be effective, this must be set to 0 during or right diff --git a/Include/mimalloc/mimalloc-internal.h b/Include/mimalloc/mimalloc-internal.h index a68e69662c7..96f5c0cb075 100644 --- a/Include/mimalloc/mimalloc-internal.h +++ b/Include/mimalloc/mimalloc-internal.h @@ -115,6 +115,8 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); void _mi_segment_thread_collect(mi_segments_tld_t* tld); +mi_segment_t* _mi_segment_abandoned(void); +mi_segment_t* _mi_segment_abandoned_visited(void); #if MI_HUGE_PAGE_ABANDON void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); @@ -438,9 +440,10 @@ static inline bool mi_heap_is_initialized(mi_heap_t* heap) { } static inline uintptr_t _mi_ptr_cookie(const void* p) { - extern mi_heap_t _mi_heap_main; - mi_assert_internal(_mi_heap_main.cookie != 0); - return ((uintptr_t)p ^ _mi_heap_main.cookie); + extern mi_heap_t _mi_main_heaps[]; + mi_heap_t* _mi_heap_main = &_mi_main_heaps[mi_heap_tag_default]; + mi_assert_internal(_mi_heap_main->cookie != 0); + return ((uintptr_t)p ^ _mi_heap_main->cookie); } /* ----------------------------------------------------------- diff --git a/Include/mimalloc/mimalloc-types.h b/Include/mimalloc/mimalloc-types.h index 884c9f86cb3..755a1489bb8 100644 --- a/Include/mimalloc/mimalloc-types.h +++ b/Include/mimalloc/mimalloc-types.h @@ -67,7 +67,9 @@ terms of the MIT license. A copy of the license can be found in the file // Encoded free lists allow detection of corrupted free lists // and can detect buffer overflows, modify after free, and double `free`s. #if (MI_SECURE>=3 || MI_DEBUG>=1) -#define MI_ENCODE_FREELIST 1 +// TODO(sgross): Don't encode free-list because it breaks the constraint that +// freed blocks do not have the LSB of the first word set. +//#define MI_ENCODE_FREELIST 1 #endif @@ -277,6 +279,7 @@ typedef struct mi_page_s { uint8_t is_reset : 1; // `true` if the page memory was reset uint8_t is_committed : 1; // `true` if the page virtual memory is committed uint8_t is_zero_init : 1; // `true` if the page was zero initialized + uint8_t tag : 4; // heap tag (mi_heap_tag_t) // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` @@ -454,6 +457,8 @@ struct mi_heap_s { size_t page_retired_max; // largest retired index into the `pages` array. mi_heap_t* next; // list of heaps per thread bool no_reclaim; // `true` if this heap should not reclaim abandoned pages + unsigned char tag; + bool visited; // used by gcmodule.c }; @@ -601,6 +606,7 @@ struct mi_tld_s { bool recurse; // true if deferred was called; used to prevent infinite recursion. mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted) mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates) + mi_heap_t* default_heaps[MI_NUM_HEAPS]; mi_segments_tld_t segments; // segment tld mi_os_tld_t os; // os tld mi_stats_t stats; // statistics diff --git a/Include/mimalloc/mimalloc.h b/Include/mimalloc/mimalloc.h index 9b72fbfda74..e1514911b0f 100644 --- a/Include/mimalloc/mimalloc.h +++ b/Include/mimalloc/mimalloc.h @@ -187,12 +187,22 @@ mi_decl_nodiscard mi_decl_export void* mi_realloc_aligned_at(void* p, size_t new struct mi_heap_s; typedef struct mi_heap_s mi_heap_t; +typedef enum mi_heap_tag_e { + mi_heap_tag_default, + mi_heap_tag_obj, + mi_heap_tag_gc, + mi_heap_tag_list_array, + mi_heap_tag_dict_keys, + MI_NUM_HEAPS +} mi_heap_tag_t; + mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new(void); mi_decl_export void mi_heap_delete(mi_heap_t* heap); mi_decl_export void mi_heap_destroy(mi_heap_t* heap); mi_decl_export mi_heap_t* mi_heap_set_default(mi_heap_t* heap); mi_decl_export mi_heap_t* mi_heap_get_default(void); mi_decl_export mi_heap_t* mi_heap_get_backing(void); +mi_decl_export mi_heap_t* mi_heap_get_tag(mi_heap_tag_t tag); mi_decl_export void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); diff --git a/Objects/mimalloc/heap.c b/Objects/mimalloc/heap.c index ac2d042bfd2..c8f41f13fa5 100644 --- a/Objects/mimalloc/heap.c +++ b/Objects/mimalloc/heap.c @@ -200,6 +200,12 @@ mi_heap_t* mi_heap_get_backing(void) { return bheap; } +mi_heap_t* mi_heap_get_tag(mi_heap_tag_t tag) { + mi_assert(tag >= 0 && tag < MI_NUM_HEAPS); + mi_heap_t* def = mi_heap_get_default(); + return def->tld->default_heaps[tag]; +} + mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena( mi_arena_id_t arena_id ) { mi_heap_t* bheap = mi_heap_get_backing(); mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode? diff --git a/Objects/mimalloc/init.c b/Objects/mimalloc/init.c index c416208cfdd..30e6f688571 100644 --- a/Objects/mimalloc/init.c +++ b/Objects/mimalloc/init.c @@ -13,6 +13,7 @@ terms of the MIT license. A copy of the license can be found in the file // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { 0, false, false, false, false, + 0, // tag 0, // capacity 0, // reserved capacity { 0 }, // flags @@ -92,6 +93,7 @@ const mi_page_t _mi_page_empty = { SQNULL( 192), SQNULL( 224), SQNULL( 256), SQNULL( 320), SQNULL( 384), SQNULL( 448), SQNULL( 512), SQNULL( 640), /* 32 */ \ SQNULL( 768), SQNULL( 896), SQNULL( 1024) /* 35 */ } +static mi_span_queue_t _sq_empty[] = MI_SEGMENT_SPAN_QUEUES_EMPTY; // -------------------------------------------------------- // Statically allocate an empty heap as the initial @@ -115,67 +117,60 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = { 0, // page count MI_BIN_FULL, 0, // page retired min/max NULL, // next - false -}; - -#define tld_empty_stats ((mi_stats_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,stats))) -#define tld_empty_os ((mi_os_tld_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,os))) - -mi_decl_cache_align static const mi_tld_t tld_empty = { - 0, false, - NULL, NULL, - { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, tld_empty_stats, tld_empty_os }, // segments - { 0, tld_empty_stats }, // os - { MI_STATS_NULL } // stats + 0, + false }; // the thread-local default heap for allocation mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; -extern mi_heap_t _mi_heap_main; +#define _mi_heap_main (_mi_main_heaps[0]) -static mi_tld_t tld_main = { - 0, false, - &_mi_heap_main, & _mi_heap_main, - { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, &tld_main.stats, &tld_main.os }, // segments - { 0, &tld_main.stats }, // os - { MI_STATS_NULL } // stats -}; +mi_heap_t _mi_main_heaps[MI_NUM_HEAPS]; -mi_heap_t _mi_heap_main = { - &tld_main, - MI_SMALL_PAGES_EMPTY, - MI_PAGE_QUEUES_EMPTY, - MI_ATOMIC_VAR_INIT(NULL), - 0, // thread id - 0, // initial cookie - 0, // arena id - { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) - { {0x846ca68b}, {0}, 0, true }, // random - 0, // page count - MI_BIN_FULL, 0, // page retired min/max - NULL, // next heap - false // can reclaim -}; +static mi_tld_t tld_main; bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`. mi_stats_t _mi_stats_main = { MI_STATS_NULL }; + +static void _mi_heap_init_ex(mi_heap_t* heap, mi_tld_t* tld, int tag) { + if (heap->cookie != 0) return; + _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(*heap)); + heap->thread_id = _mi_thread_id(); + heap->cookie = 1; + #if defined(_WIN32) && !defined(MI_SHARED_LIB) + _mi_random_init_weak(&heap->random); // prevent allocation failure during bcrypt dll initialization with static linking + #else + _mi_random_init(&heap->random); + #endif + heap->cookie = _mi_heap_random_next(heap) | 1; + heap->keys[0] = _mi_heap_random_next(heap); + heap->keys[1] = _mi_heap_random_next(heap); + heap->tld = tld; + heap->tag = tag; +} + +static void _mi_thread_init_ex(mi_tld_t* tld, mi_heap_t heaps[]) +{ + for (int tag = 0; tag < MI_NUM_HEAPS; tag++) { + _mi_heap_init_ex(&heaps[tag], tld, tag); + tld->default_heaps[tag] = &heaps[tag]; + } + _mi_memcpy_aligned(&tld->segments.spans, &_sq_empty, sizeof(_sq_empty)); + tld->heap_backing = &heaps[mi_heap_tag_default]; + tld->heaps = heaps; + tld->segments.stats = &tld->stats; + tld->segments.os = &tld->os; + tld->os.stats = &tld->stats; +} + static void mi_heap_main_init(void) { if (_mi_heap_main.cookie == 0) { - _mi_heap_main.thread_id = _mi_thread_id(); - _mi_heap_main.cookie = 1; - #if defined(_WIN32) && !defined(MI_SHARED_LIB) - _mi_random_init_weak(&_mi_heap_main.random); // prevent allocation failure during bcrypt dll initialization with static linking - #else - _mi_random_init(&_mi_heap_main.random); - #endif - _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); - _mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main); - _mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main); + _mi_thread_init_ex(&tld_main, _mi_main_heaps); } } @@ -191,7 +186,7 @@ mi_heap_t* _mi_heap_main_get(void) { // note: in x64 in release build `sizeof(mi_thread_data_t)` is under 4KiB (= OS page size). typedef struct mi_thread_data_s { - mi_heap_t heap; // must come first due to cast in `_mi_heap_done` + mi_heap_t heaps[MI_NUM_HEAPS]; // must come first due to cast in `_mi_heap_done` mi_tld_t tld; } mi_thread_data_t; @@ -212,6 +207,7 @@ static mi_thread_data_t* mi_thread_data_alloc(void) { if (td != NULL) { td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); if (td != NULL) { + memset(td, 0, sizeof(*td)); return td; } } @@ -273,22 +269,8 @@ static bool _mi_heap_init(void) { if (td == NULL) return false; // OS allocated so already zero initialized - mi_tld_t* tld = &td->tld; - mi_heap_t* heap = &td->heap; - _mi_memcpy_aligned(tld, &tld_empty, sizeof(*tld)); - _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(*heap)); - heap->thread_id = _mi_thread_id(); - _mi_random_init(&heap->random); - heap->cookie = _mi_heap_random_next(heap) | 1; - heap->keys[0] = _mi_heap_random_next(heap); - heap->keys[1] = _mi_heap_random_next(heap); - heap->tld = tld; - tld->heap_backing = heap; - tld->heaps = heap; - tld->segments.stats = &tld->stats; - tld->segments.os = &tld->os; - tld->os.stats = &tld->stats; - _mi_heap_set_default_direct(heap); + _mi_thread_init_ex(&td->tld, td->heaps); + _mi_heap_set_default_direct(&td->heaps[0]); } return false; } diff --git a/Objects/mimalloc/page.c b/Objects/mimalloc/page.c index 4250ff358b4..4c408e517ff 100644 --- a/Objects/mimalloc/page.c +++ b/Objects/mimalloc/page.c @@ -252,6 +252,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE); #endif mi_assert_internal(!page->is_reset); + mi_assert_internal(page->tag == heap->tag); // TODO: push on full queue immediately if it is full? mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); mi_page_queue_push(heap, pq, page); @@ -672,6 +673,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi #else page->is_zero = page->is_zero_init; #endif + page->tag = heap->tag; mi_assert_internal(page->is_committed); mi_assert_internal(!page->is_reset); diff --git a/Objects/mimalloc/segment.c b/Objects/mimalloc/segment.c index a3047778865..f2fa9c3f0b4 100644 --- a/Objects/mimalloc/segment.c +++ b/Objects/mimalloc/segment.c @@ -1214,6 +1214,16 @@ static mi_segment_t* mi_abandoned_pop(void) { Abandon segment/page ----------------------------------------------------------- */ +extern mi_segment_t* _mi_segment_abandoned(void) { + mi_tagged_segment_t ts = mi_atomic_load_acquire(&abandoned); + mi_segment_t *segment = mi_tagged_segment_ptr(ts); + return segment; +} + +extern mi_segment_t* _mi_segment_abandoned_visited(void) { + return mi_atomic_load_ptr_acquire(mi_segment_t, &abandoned_visited); +} + static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); mi_assert_internal(segment->used > 0); @@ -1276,7 +1286,7 @@ static mi_slice_t* mi_slices_start_iterate(mi_segment_t* segment, const mi_slice } // Possibly free pages and check if free space is available -static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, size_t block_size, mi_segments_tld_t* tld) +static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, size_t block_size, int tag, mi_segments_tld_t* tld) { mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); mi_assert_internal(mi_segment_is_abandoned(segment)); @@ -1304,7 +1314,7 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, s } } else { - if (page->xblock_size == block_size && mi_page_has_any_available(page)) { + if (page->xblock_size == block_size && mi_page_has_any_available(page) && page->tag == tag) { // a page has available free blocks of the right size has_page = true; } @@ -1321,6 +1331,14 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, s return has_page; } +static mi_heap_t* mi_heap_from_tag(mi_heap_t* base, unsigned int tag) +{ + if (tag == base->tag) { + return base; + } + return base->tld->default_heaps[tag]; +} + // Reclaim an abandoned segment; returns NULL if the segment was freed // set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full. static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) { @@ -1343,6 +1361,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, if (mi_slice_is_used(slice)) { // in use: reclaim the page in our heap mi_page_t* page = mi_slice_to_page(slice); + mi_heap_t* target_heap = mi_heap_from_tag(heap, page->tag); mi_assert_internal(!page->is_reset); mi_assert_internal(page->is_committed); mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); @@ -1351,7 +1370,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, _mi_stat_decrease(&tld->stats->pages_abandoned, 1); segment->abandoned--; // set the heap again and allow delayed free again - mi_page_set_heap(page, heap); + mi_page_set_heap(page, target_heap); _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) _mi_page_free_collect(page, false); // ensure used count is up to date if (mi_page_all_free(page)) { @@ -1360,8 +1379,9 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, } else { // otherwise reclaim it into the heap - _mi_page_reclaim(heap, page); - if (requested_block_size == page->xblock_size && mi_page_has_any_available(page)) { + _mi_page_reclaim(target_heap, page); + if (heap == target_heap && + requested_block_size == page->xblock_size && mi_page_has_any_available(page)) { if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; } } } @@ -1403,7 +1423,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice // todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments // and push them into the visited list and use many tries. Perhaps we can skip non-suitable ones in a better way? bool is_suitable = _mi_heap_memid_is_suitable(heap, segment->memid); - bool has_page = mi_segment_check_free(segment,needed_slices,block_size,tld); // try to free up pages (due to concurrent frees) + bool has_page = mi_segment_check_free(segment,needed_slices,block_size,heap->tag,tld); // try to free up pages (due to concurrent frees) if (segment->used == 0) { // free the segment (by forced reclaim) to make it available to other threads. // note1: we prefer to free a segment as that might lead to reclaiming another @@ -1440,7 +1460,7 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) mi_abandoned_visited_revisit(); } while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { - mi_segment_check_free(segment,0,0,tld); // try to free up pages (due to concurrent frees) + mi_segment_check_free(segment,0,0,heap->tag,tld); // try to free up pages (due to concurrent frees) if (segment->used == 0) { // free the segment (by forced reclaim) to make it available to other threads. // note: we could in principle optimize this by skipping reclaim and directly diff --git a/Python/pystate.c b/Python/pystate.c index 2c7325e4da6..7f8427d692c 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -19,6 +19,7 @@ #include "pycore_refcnt.h" #include "parking_lot.h" +#include "mimalloc.h" /* -------------------------------------------------------------------------- CAUTION @@ -43,6 +44,10 @@ to avoid the expense of doing their own locking). extern "C" { #endif +#if PY_NUM_HEAPS != MI_NUM_HEAPS +#error "PY_NUM_HEAPS does not match MI_NUM_HEAPS" +#endif + #define _PyRuntimeGILState_GetThreadState(gilstate) _PyThreadState_GET() #define _PyRuntimeGILState_SetThreadState(gilstate, value) _PyThreadState_SET(value) @@ -883,6 +888,9 @@ void _PyThreadState_SetCurrent(PyThreadState *tstate) { tstate->fast_thread_id = _Py_ThreadId(); + for (int tag = 0; tag < Py_NUM_HEAPS; tag++) { + tstate->heaps[tag] = mi_heap_get_tag(tag); + } _PyParkingLot_InitThread(); _Py_queue_create(tstate); _PyGILState_NoteThreadState(&tstate->interp->runtime->gilstate, tstate); @@ -1096,6 +1104,10 @@ tstate_delete_common(PyThreadState *tstate, _Py_qsbr_unregister(tstate_impl->qsbr); tstate_impl->qsbr = NULL; + for (int tag = 0; tag < Py_NUM_HEAPS; tag++) { + tstate->heaps[tag] = NULL; + } + _PyRuntimeState *runtime = interp->runtime; HEAD_LOCK(runtime); if (tstate->prev) {