pythonGH-126491: Lower heap size limit with faster marking (pythonGH-…

…127519) * Faster marking of reachable objects * Changes calculation of work to do and work done. * Merges transitive closure calculations
encukou · Dec 6, 2024 · 023b7d2 · 023b7d2
1 parent 8b7c194
commit 023b7d2
Show file tree

Hide file tree

Showing 6 changed files with 208 additions and 243 deletions.
diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md
@@ -199,22 +199,22 @@ unreachable:
 
 ```pycon
 >>> import gc
->>> 
+>>>
 >>> class Link:
 ...    def __init__(self, next_link=None):
 ...        self.next_link = next_link
-...  
+...
 >>> link_3 = Link()
 >>> link_2 = Link(link_3)
 >>> link_1 = Link(link_2)
 >>> link_3.next_link = link_1
 >>> A = link_1
 >>> del link_1, link_2, link_3
->>> 
+>>>
 >>> link_4 = Link()
 >>> link_4.next_link = link_4
 >>> del link_4
->>> 
+>>>
 >>> # Collect the unreachable Link object (and its .__dict__ dict).
 >>> gc.collect()
 2
@@ -459,11 +459,11 @@ specifically in a generation by calling `gc.collect(generation=NUM)`.
 >>> # Create a reference cycle.
 >>> x = MyObj()
 >>> x.self = x
->>> 
+>>>
 >>> # Initially the object is in the young generation.
 >>> gc.get_objects(generation=0)
 [..., <__main__.MyObj object at 0x7fbcc12a3400>, ...]
->>> 
+>>>
 >>> # After a collection of the youngest generation the object
 >>> # moves to the old generation.
 >>> gc.collect(generation=0)
@@ -515,6 +515,44 @@ increment. All objects directly referred to from those stack frames are
 added to the working set.
 Then the above algorithm is repeated, starting from step 2.
 
+Determining how much work to do
+-------------------------------
+
+We need to do a certain amount of work to enusre that garbage is collected,
+but doing too much work slows down execution.
+
+To work out how much work we need to do, consider a heap with `L` live objects
+and `G0` garbage objects at the start of a full scavenge and `G1` garbage objects
+at the end of the scavenge. We don't want the amount of garbage to grow, `G1 ≤ G0`, and
+we don't want too much garbage (say 1/3 of the heap maximum), `G0 ≤ L/2`.
+For each full scavenge we must visit all objects, `T == L + G0 + G1`, during which
+`G1` garbage objects are created.
+
+The number of new objects created `N` must be at least the new garbage created, `N ≥ G1`,
+assuming that the number of live objects remains roughly constant.
+If we set `T == 4*N` we get `T > 4*G1` and `T = L + G0 + G1` => `L + G0 > 3G1`
+For a steady state heap (`G0 == G1`) we get `L > 2G0` and the desired garbage ratio.
+
+In other words, to keep the garbage fraction to 1/3 or less we need to visit
+4 times as many objects as are newly created.
+
+We can do better than this though. Not all new objects will be garbage.
+Consider the heap at the end of the scavenge with `L1` live objects and `G1`
+garbage. Also, note that `T == M + I` where `M` is the number of objects marked
+as reachable and `I` is the number of objects visited in increments.
+Everything in `M` is live, so `I ≥ G0` and in practice `I` is closer to `G0 + G1`.
+
+If we choose the amount of work done such that `2*M + I == 6N` then we can do
+less work in most cases, but are still guaranteed to keep up.
+Since `I ≳ G0 + G1` (not strictly true, but close enough)
+`T == M + I == (6N + I)/2` and `(6N + I)/2 ≳ 4G`, so we can keep up.
+
+The reason that this improves performance is that `M` is usually much larger
+than `I`. If `M == 10I`, then `T ≅ 3N`.
+
+Finally, instead of using a fixed multiple of 8, we gradually increase it as the
+heap grows. This avoids wasting work for small heaps and during startup.
+
 
 Optimization: reusing fields to save memory
 ===========================================

diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py
@@ -1161,27 +1161,19 @@ def make_ll(depth):
             return head
 
         head = make_ll(1000)
-        count = 1000
-
-        # There will be some objects we aren't counting,
-        # e.g. the gc stats dicts. This test checks
-        # that the counts don't grow, so we try to
-        # correct for the uncounted objects
-        # This is just an estimate.
-        CORRECTION = 20
 
         enabled = gc.isenabled()
         gc.enable()
         olds = []
         initial_heap_size = _testinternalcapi.get_tracked_heap_size()
-        for i in range(20_000):
+        iterations = max(20_000, initial_heap_size)
+        for i in range(iterations):
             newhead = make_ll(20)
-            count += 20
             newhead.surprise = head
             olds.append(newhead)
             if len(olds) == 20:
                 new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size
-                self.assertLess(new_objects, 27_000, f"Heap growing. Reached limit after {i} iterations")
+                self.assertLess(new_objects, initial_heap_size/2, f"Heap growing. Reached limit after {i} iterations")
                 del olds[:]
         if not enabled:
             gc.disable()

diff --git a/Objects/dictobject.c b/Objects/dictobject.c
@@ -7064,9 +7064,7 @@ int
 PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg)
 {
     PyTypeObject *tp = Py_TYPE(obj);
-    if((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0) {
-        return 0;
-    }
+    assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT);
     if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) {
         PyDictValues *values = _PyObject_InlineValues(obj);
         if (values->valid) {

diff --git a/Objects/genobject.c b/Objects/genobject.c
@@ -882,25 +882,7 @@ PyTypeObject PyGen_Type = {
     gen_methods,                                /* tp_methods */
     gen_memberlist,                             /* tp_members */
     gen_getsetlist,                             /* tp_getset */
-    0,                                          /* tp_base */
-    0,                                          /* tp_dict */
-
-    0,                                          /* tp_descr_get */
-    0,                                          /* tp_descr_set */
-    0,                                          /* tp_dictoffset */
-    0,                                          /* tp_init */
-    0,                                          /* tp_alloc */
-    0,                                          /* tp_new */
-    0,                                          /* tp_free */
-    0,                                          /* tp_is_gc */
-    0,                                          /* tp_bases */
-    0,                                          /* tp_mro */
-    0,                                          /* tp_cache */
-    0,                                          /* tp_subclasses */
-    0,                                          /* tp_weaklist */
-    0,                                          /* tp_del */
-    0,                                          /* tp_version_tag */
-    _PyGen_Finalize,                            /* tp_finalize */
+    .tp_finalize = _PyGen_Finalize,
 };
 
 static PyObject *
@@ -1242,24 +1224,7 @@ PyTypeObject PyCoro_Type = {
     coro_methods,                               /* tp_methods */
     coro_memberlist,                            /* tp_members */
     coro_getsetlist,                            /* tp_getset */
-    0,                                          /* tp_base */
-    0,                                          /* tp_dict */
-    0,                                          /* tp_descr_get */
-    0,                                          /* tp_descr_set */
-    0,                                          /* tp_dictoffset */
-    0,                                          /* tp_init */
-    0,                                          /* tp_alloc */
-    0,                                          /* tp_new */
-    0,                                          /* tp_free */
-    0,                                          /* tp_is_gc */
-    0,                                          /* tp_bases */
-    0,                                          /* tp_mro */
-    0,                                          /* tp_cache */
-    0,                                          /* tp_subclasses */
-    0,                                          /* tp_weaklist */
-    0,                                          /* tp_del */
-    0,                                          /* tp_version_tag */
-    _PyGen_Finalize,                            /* tp_finalize */
+    .tp_finalize = _PyGen_Finalize,
 };
 
 static void
@@ -1464,7 +1429,6 @@ typedef struct _PyAsyncGenWrappedValue {
     (assert(_PyAsyncGenWrappedValue_CheckExact(op)), \
      _Py_CAST(_PyAsyncGenWrappedValue*, (op)))
 
-
 static int
 async_gen_traverse(PyObject *self, visitproc visit, void *arg)
 {
@@ -1673,24 +1637,7 @@ PyTypeObject PyAsyncGen_Type = {
     async_gen_methods,                          /* tp_methods */
     async_gen_memberlist,                       /* tp_members */
     async_gen_getsetlist,                       /* tp_getset */
-    0,                                          /* tp_base */
-    0,                                          /* tp_dict */
-    0,                                          /* tp_descr_get */
-    0,                                          /* tp_descr_set */
-    0,                                          /* tp_dictoffset */
-    0,                                          /* tp_init */
-    0,                                          /* tp_alloc */
-    0,                                          /* tp_new */
-    0,                                          /* tp_free */
-    0,                                          /* tp_is_gc */
-    0,                                          /* tp_bases */
-    0,                                          /* tp_mro */
-    0,                                          /* tp_cache */
-    0,                                          /* tp_subclasses */
-    0,                                          /* tp_weaklist */
-    0,                                          /* tp_del */
-    0,                                          /* tp_version_tag */
-    _PyGen_Finalize,                            /* tp_finalize */
+    .tp_finalize = _PyGen_Finalize,
 };
 
 
@@ -1935,16 +1882,6 @@ PyTypeObject _PyAsyncGenASend_Type = {
     PyObject_SelfIter,                          /* tp_iter */
     async_gen_asend_iternext,                   /* tp_iternext */
     async_gen_asend_methods,                    /* tp_methods */
-    0,                                          /* tp_members */
-    0,                                          /* tp_getset */
-    0,                                          /* tp_base */
-    0,                                          /* tp_dict */
-    0,                                          /* tp_descr_get */
-    0,                                          /* tp_descr_set */
-    0,                                          /* tp_dictoffset */
-    0,                                          /* tp_init */
-    0,                                          /* tp_alloc */
-    0,                                          /* tp_new */
     .tp_finalize = async_gen_asend_finalize,
 };
 

diff --git a/Objects/typeobject.c b/Objects/typeobject.c
@@ -2355,6 +2355,16 @@ subtype_traverse(PyObject *self, visitproc visit, void *arg)
     return 0;
 }
 
+
+static int
+plain_object_traverse(PyObject *self, visitproc visit, void *arg)
+{
+    PyTypeObject *type = Py_TYPE(self);
+    assert(type->tp_flags & Py_TPFLAGS_MANAGED_DICT);
+    Py_VISIT(type);
+    return PyObject_VisitManagedDict(self, visit, arg);
+}
+
 static void
 clear_slots(PyTypeObject *type, PyObject *self)
 {
@@ -4147,6 +4157,9 @@ type_new_descriptors(const type_new_ctx *ctx, PyTypeObject *type)
         assert((type->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0);
         type->tp_flags |= Py_TPFLAGS_MANAGED_DICT;
         type->tp_dictoffset = -1;
+        if (type->tp_basicsize == sizeof(PyObject)) {
+            type->tp_traverse = plain_object_traverse;
+        }
     }
 
     type->tp_basicsize = slotoffset;