From 30b6d5c59ea4d98d2d1eb46a0c534591a4ad939b Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 4 Jun 2024 09:42:13 -0400 Subject: [PATCH] gh-117657: Fix race involving GC and heap initialization (GH-119923) The `_PyThreadState_Bind()` function is called before the first `PyEval_AcquireThread()` so it's not synchronized with the stop the world GC. We had a race where `gc_visit_heaps()` might visit a thread's heap while it's being initialized. Use a simple atomic int to avoid visiting heaps for threads that are not yet fully initialized (i.e., before `tstate_mimalloc_bind()` is called). The race was reproducible by running: `python Lib/test/test_importlib/partial/pool_in_threads.py`. (cherry picked from commit e69d068ad0bd6a25434ea476a647b635da4d82bb) Co-authored-by: Sam Gross --- Include/internal/pycore_mimalloc.h | 1 + Python/gc_free_threading.c | 4 ++++ Python/pystate.c | 2 ++ Tools/tsan/suppressions_free_threading.txt | 3 --- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_mimalloc.h b/Include/internal/pycore_mimalloc.h index 100f78d53021ee..d10b01d5b49b19 100644 --- a/Include/internal/pycore_mimalloc.h +++ b/Include/internal/pycore_mimalloc.h @@ -52,6 +52,7 @@ struct _mimalloc_thread_state { mi_heap_t *current_object_heap; mi_heap_t heaps[_Py_MIMALLOC_HEAP_COUNT]; mi_tld_t tld; + int initialized; struct llist_node page_list; }; #endif diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index d005b79ff40dbf..f19362c9573812 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -252,6 +252,10 @@ gc_visit_heaps_lock_held(PyInterpreterState *interp, mi_block_visit_fun *visitor // visit each thread's heaps for GC objects for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { struct _mimalloc_thread_state *m = &((_PyThreadStateImpl *)p)->mimalloc; + if (!_Py_atomic_load_int(&m->initialized)) { + // The thread may not have called tstate_mimalloc_bind() yet. + continue; + } arg->offset = offset_base; if (!mi_heap_visit_blocks(&m->heaps[_Py_MIMALLOC_HEAP_GC], true, diff --git a/Python/pystate.c b/Python/pystate.c index d0293915db7689..e1a95907b57d20 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -3074,6 +3074,8 @@ tstate_mimalloc_bind(PyThreadState *tstate) // _PyObject_GC_New() and similar functions temporarily override this to // use one of the GC heaps. mts->current_object_heap = &mts->heaps[_Py_MIMALLOC_HEAP_OBJECT]; + + _Py_atomic_store_int(&mts->initialized, 1); #endif } diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index d5fcac61f0db04..8b64d1ff321858 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -25,7 +25,6 @@ race:free_threadstate race_top:_add_to_weak_set race_top:_in_weak_set -race_top:_mi_heap_delayed_free_partial race_top:_PyEval_EvalFrameDefault race_top:_PyImport_AcquireLock race_top:_PyImport_ReleaseLock @@ -33,7 +32,6 @@ race_top:_PyType_HasFeature race_top:assign_version_tag race_top:insertdict race_top:lookup_tp_dict -race_top:mi_heap_visit_pages race_top:PyMember_GetOne race_top:PyMember_SetOne race_top:new_reference @@ -58,7 +56,6 @@ race_top:_Py_slot_tp_getattr_hook race_top:add_threadstate race_top:dump_traceback race_top:fatal_error -race_top:mi_page_decode_padding race_top:_multiprocessing_SemLock_release_impl race_top:_PyFrame_GetCode race_top:_PyFrame_Initialize