diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 9d66e62ba8b5e3..e729616936f03b 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -282,6 +282,7 @@ struct _gc_runtime_state { /* a list of callbacks to be invoked when collection is performed */ PyObject *callbacks; + Py_ssize_t heap_size; Py_ssize_t work_to_do; /* Which of the old spaces is the visited space */ int visited_space; @@ -321,7 +322,7 @@ extern void _PyGC_Unfreeze(PyInterpreterState *interp); /* Number of frozen objects */ extern Py_ssize_t _PyGC_GetFreezeCount(PyInterpreterState *interp); -extern PyObject *_PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation); +extern PyObject *_PyGC_GetObjects(PyInterpreterState *interp, int generation); extern PyObject *_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs); // Functions to clear types free lists diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index ce01916bcabe4f..57acbac5859e7f 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1058,7 +1058,19 @@ class Z: callback.assert_not_called() gc.enable() + +class IncrementalGCTests(unittest.TestCase): + + def setUp(self): + # Reenable GC as it is disabled module-wide + gc.enable() + + def tearDown(self): + gc.disable() + @unittest.skipIf(Py_GIL_DISABLED, "Free threading does not support incremental GC") + # Use small increments to emulate longer running process in a shorter time + @gc_threshold(200, 10) def test_incremental_gc_handles_fast_cycle_creation(self): class LinkedList: @@ -1080,28 +1092,31 @@ def make_ll(depth): head = LinkedList(head, head.prev) return head - head = make_ll(10000) - count = 10000 + head = make_ll(1000) + count = 1000 - # We expect the counts to go negative eventually - # as there will some objects we aren't counting, - # e.g. the gc stats dicts. The test merely checks - # that the counts don't grow. + # There will be some objects we aren't counting, + # e.g. the gc stats dicts. This test checks + # that the counts don't grow, so we try to + # correct for the uncounted objects + # This is just an estimate. + CORRECTION = 20 enabled = gc.isenabled() gc.enable() olds = [] - for i in range(1000): - newhead = make_ll(200) - count += 200 + for i in range(20_000): + newhead = make_ll(20) + count += 20 newhead.surprise = head olds.append(newhead) - if len(olds) == 50: + if len(olds) == 20: stats = gc.get_stats() young = stats[0] incremental = stats[1] old = stats[2] collected = young['collected'] + incremental['collected'] + old['collected'] + count += CORRECTION live = count - collected self.assertLess(live, 25000) del olds[:] diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-03-21-12-10-11.gh-issue-117108._6jIrB.rst b/Misc/NEWS.d/next/Core and Builtins/2024-03-21-12-10-11.gh-issue-117108._6jIrB.rst new file mode 100644 index 00000000000000..57ad9606b05e05 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-03-21-12-10-11.gh-issue-117108._6jIrB.rst @@ -0,0 +1,3 @@ +The cycle GC now chooses the size of increments based on the total heap +size, instead of the rate of object creation. This ensures that it can keep +up with growing heaps. diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 3320e54dd9fe93..8a1b483eddae35 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -326,7 +326,7 @@ gc_get_objects_impl(PyObject *module, Py_ssize_t generation) } PyInterpreterState *interp = _PyInterpreterState_GET(); - return _PyGC_GetObjects(interp, generation); + return _PyGC_GetObjects(interp, (int)generation); } /*[clinic input] diff --git a/Python/gc.c b/Python/gc.c index d0f4ce38bbe567..2517b86a41fa53 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -182,6 +182,7 @@ _PyGC_Init(PyInterpreterState *interp) if (gcstate->callbacks == NULL) { return _PyStatus_NO_MEMORY(); } + gcstate->heap_size = 0; return _PyStatus_OK(); } @@ -1232,7 +1233,7 @@ gc_collect_region(PyThreadState *tstate, struct gc_collection_stats *stats); static inline Py_ssize_t -gc_list_set_space(PyGC_Head *list, uintptr_t space) +gc_list_set_space(PyGC_Head *list, int space) { Py_ssize_t size = 0; PyGC_Head *gc; @@ -1258,9 +1259,9 @@ gc_list_set_space(PyGC_Head *list, uintptr_t space) * N == 1.4 (1 + 4/threshold) */ -/* Multiply by 4 so that the default incremental threshold of 10 - * scans objects at 20% the rate of object creation */ -#define SCAN_RATE_MULTIPLIER 2 +/* Divide by 10, so that the default incremental threshold of 10 + * scans objects at 1% of the heap size */ +#define SCAN_RATE_DIVISOR 10 static void add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) @@ -1313,7 +1314,7 @@ gc_collect_young(PyThreadState *tstate, if (scale_factor < 1) { scale_factor = 1; } - gcstate->work_to_do += survivor_count + survivor_count * SCAN_RATE_MULTIPLIER / scale_factor; + gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; add_stats(gcstate, 0, stats); } @@ -1384,12 +1385,12 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat static void completed_cycle(GCState *gcstate) { +#ifdef Py_DEBUG PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; assert(gc_list_is_empty(not_visited)); +#endif gcstate->visited_space = flip_old_space(gcstate->visited_space); - if (gcstate->work_to_do > 0) { - gcstate->work_to_do = 0; - } + gcstate->work_to_do = 0; } static void @@ -1404,13 +1405,13 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) if (scale_factor < 1) { scale_factor = 1; } - Py_ssize_t increment_size = 0; gc_list_merge(&gcstate->young.head, &increment); gcstate->young.count = 0; if (gcstate->visited_space) { /* objects in visited space have bit set, so we set it here */ gc_list_set_space(&increment, 1); } + Py_ssize_t increment_size = 0; while (increment_size < gcstate->work_to_do) { if (gc_list_is_empty(not_visited)) { break; @@ -1425,14 +1426,11 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) PyGC_Head survivors; gc_list_init(&survivors); gc_collect_region(tstate, &increment, &survivors, UNTRACK_TUPLES, stats); - Py_ssize_t survivor_count = gc_list_size(&survivors); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); - gcstate->work_to_do += survivor_count + survivor_count * SCAN_RATE_MULTIPLIER / scale_factor; + gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; gcstate->work_to_do -= increment_size; - if (gcstate->work_to_do < 0) { - gcstate->work_to_do = 0; - } + validate_old(gcstate); add_stats(gcstate, 1, stats); if (gc_list_is_empty(not_visited)) { @@ -1678,7 +1676,7 @@ _PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs) } PyObject * -_PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation) +_PyGC_GetObjects(PyInterpreterState *interp, int generation) { assert(generation >= -1 && generation < NUM_GENERATIONS); GCState *gcstate = &interp->gc; @@ -1974,6 +1972,7 @@ _PyObject_GC_Link(PyObject *op) gc->_gc_next = 0; gc->_gc_prev = 0; gcstate->young.count++; /* number of allocated GC objects */ + gcstate->heap_size++; if (gcstate->young.count > gcstate->young.threshold && gcstate->enabled && gcstate->young.threshold && @@ -2095,6 +2094,7 @@ PyObject_GC_Del(void *op) if (gcstate->young.count > 0) { gcstate->young.count--; } + gcstate->heap_size--; PyObject_Free(((char *)op)-presize); } diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 52c79c02099b53..69ce22a1e83b62 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1305,7 +1305,7 @@ visit_get_objects(const mi_heap_t *heap, const mi_heap_area_t *area, } PyObject * -_PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation) +_PyGC_GetObjects(PyInterpreterState *interp, int generation) { PyObject *result = PyList_New(0); if (!result) {