diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index 1fab3f577f2f89..567b5f6855a48a 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -394,8 +394,7 @@ Initializing and finalizing the interpreter Undo all initializations made by :c:func:`Py_Initialize` and subsequent use of Python/C API functions, and destroy all sub-interpreters (see :c:func:`Py_NewInterpreter` below) that were created and not yet destroyed since - the last call to :c:func:`Py_Initialize`. Ideally, this frees all memory - allocated by the Python interpreter. This is a no-op when called for a second + the last call to :c:func:`Py_Initialize`. This is a no-op when called for a second time (without calling :c:func:`Py_Initialize` again first). Since this is the reverse of :c:func:`Py_Initialize`, it should be called @@ -407,6 +406,12 @@ Initializing and finalizing the interpreter If there were errors during finalization (flushing buffered data), ``-1`` is returned. + Note that Python will do a best effort at freeing all memory allocated by the Python + interpreter. Therefore, any C-Extension should make sure to correctly clean up all + of the preveiously allocated PyObjects before using them in subsequent calls to + :c:func:`Py_Initialize`. Otherwise it could introduce vulnerabilities and incorrect + behavior. + This function is provided for a number of reasons. An embedding application might want to restart Python without having to restart the application itself. An application that has loaded the Python interpreter from a dynamically @@ -421,10 +426,11 @@ Initializing and finalizing the interpreter loaded extension modules loaded by Python are not unloaded. Small amounts of memory allocated by the Python interpreter may not be freed (if you find a leak, please report it). Memory tied up in circular references between objects is not - freed. Some memory allocated by extension modules may not be freed. Some - extensions may not work properly if their initialization routine is called more - than once; this can happen if an application calls :c:func:`Py_Initialize` and - :c:func:`Py_FinalizeEx` more than once. + freed. Interned strings will all be deallocated regarldess of their reference count. + Some memory allocated by extension modules may not be freed. Some extensions may not + work properly if their initialization routine is called more than once; this can + happen if an application calls :c:func:`Py_Initialize` and :c:func:`Py_FinalizeEx` + more than once. .. audit-event:: cpython._PySys_ClearAuditHooks "" c.Py_FinalizeEx diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index a7c817c47d66c6..c1a968994c06f2 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -417,6 +417,16 @@ New Features which has an ambiguous return value. (Contributed by Irit Katriel and Erlend Aasland in :gh:`105201`.) +* :c:func:`Py_Finalize` now deletes all interned strings. This + is backwards incompatible to any C-Extension that holds onto an interned + string after a call to :c:func:`Py_Finalize` and is then reused after a + call to :c:func:`Py_Initialize`. Any issues arising from this behavior will + normally result in crashes during the exectuion of the subsequent call to + :c:func:`Py_Initialize` from accessing uninitialized memory. To fix, use + an address sanitizer to identify any use-after-free coming from + an interned string and deallocate it during module shutdown. + (Contribued by Eddie Elizondo in :gh:`113601`.) + Porting to Python 3.14 ---------------------- diff --git a/Lib/test/_test_embed_structseq.py b/Lib/test/_test_embed_structseq.py index 834daa4df55fec..868f9f83e8be77 100644 --- a/Lib/test/_test_embed_structseq.py +++ b/Lib/test/_test_embed_structseq.py @@ -1,31 +1,27 @@ import sys import types +import unittest -# Note: This test file can't import `unittest` since the runtime can't -# currently guarantee that it will not leak memory. Doing so will mark -# the test as passing but with reference leaks. This can safely import -# the `unittest` library once there's a strict guarantee of no leaks -# during runtime shutdown. # bpo-46417: Test that structseq types used by the sys module are still # valid when Py_Finalize()/Py_Initialize() are called multiple times. -class TestStructSeq: +class TestStructSeq(unittest.TestCase): # test PyTypeObject members - def _check_structseq(self, obj_type): + def check_structseq(self, obj_type): # ob_refcnt - assert sys.getrefcount(obj_type) > 1 + self.assertGreaterEqual(sys.getrefcount(obj_type), 1) # tp_base - assert issubclass(obj_type, tuple) + self.assertTrue(issubclass(obj_type, tuple)) # tp_bases - assert obj_type.__bases__ == (tuple,) + self.assertEqual(obj_type.__bases__, (tuple,)) # tp_dict - assert isinstance(obj_type.__dict__, types.MappingProxyType) + self.assertIsInstance(obj_type.__dict__, types.MappingProxyType) # tp_mro - assert obj_type.__mro__ == (obj_type, tuple, object) + self.assertEqual(obj_type.__mro__, (obj_type, tuple, object)) # tp_name - assert isinstance(type.__name__, str) + self.assertIsInstance(type.__name__, str) # tp_subclasses - assert obj_type.__subclasses__() == [] + self.assertEqual(obj_type.__subclasses__(), []) def test_sys_attrs(self): for attr_name in ( @@ -36,23 +32,23 @@ def test_sys_attrs(self): 'thread_info', # ThreadInfoType 'version_info', # VersionInfoType ): - attr = getattr(sys, attr_name) - self._check_structseq(type(attr)) + with self.subTest(attr=attr_name): + attr = getattr(sys, attr_name) + self.check_structseq(type(attr)) def test_sys_funcs(self): func_names = ['get_asyncgen_hooks'] # AsyncGenHooksType if hasattr(sys, 'getwindowsversion'): func_names.append('getwindowsversion') # WindowsVersionType for func_name in func_names: - func = getattr(sys, func_name) - obj = func() - self._check_structseq(type(obj)) + with self.subTest(func=func_name): + func = getattr(sys, func_name) + obj = func() + self.check_structseq(type(obj)) try: - tests = TestStructSeq() - tests.test_sys_attrs() - tests.test_sys_funcs() + unittest.main() except SystemExit as exc: if exc.args[0] != 0: raise diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-15-18-11-48.gh-issue-113190.OwQX64.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-15-18-11-48.gh-issue-113190.OwQX64.rst new file mode 100644 index 00000000000000..4c12870c3df548 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-15-18-11-48.gh-issue-113190.OwQX64.rst @@ -0,0 +1 @@ +:c:func:`Py_Finalize` now deletes all interned strings. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 12578812a762f6..b96e6c0f4ee389 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15614,19 +15614,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) int shared = 0; switch (PyUnicode_CHECK_INTERNED(s)) { case SSTATE_INTERNED_IMMORTAL: - /* Make immortal interned strings mortal again. - * - * Currently, the runtime is not able to guarantee that it can exit - * without allocations that carry over to a future initialization - * of Python within the same process. i.e: - * ./python -X showrefcount -c 'import itertools' - * [237 refs, 237 blocks] - * - * This should remain disabled (`Py_DEBUG` only) until there is a - * strict guarantee that no memory will be left after - * `Py_Finalize`. - */ -#ifdef Py_DEBUG + /* Make immortal interned strings mortal again. */ // Skip the Immortal Instance check and restore // the two references (key and value) ignored // by PyUnicode_InternInPlace(). @@ -15639,7 +15627,6 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) #ifdef INTERNED_STATS total_length += PyUnicode_GET_LENGTH(s); #endif -#endif // Py_DEBUG break; case SSTATE_INTERNED_IMMORTAL_STATIC: /* It is shared between interpreters, so we should unmark it