From 9a1e55b8c5723206116f7016921be3937ef2f4e5 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Thu, 28 Mar 2024 06:59:31 -0400 Subject: [PATCH] gh-117178: Recover lazy loading of self-referential modules (#117179) --- Lib/importlib/util.py | 11 +++++------ Lib/test/test_importlib/test_lazy.py | 18 ++++++++++++++++++ ...4-03-23-14-26-18.gh-issue-117178.vTisTG.rst | 2 ++ 3 files changed, 25 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-03-23-14-26-18.gh-issue-117178.vTisTG.rst diff --git a/Lib/importlib/util.py b/Lib/importlib/util.py index da9bd080a8dd5a..f1bb4b1fb41576 100644 --- a/Lib/importlib/util.py +++ b/Lib/importlib/util.py @@ -178,12 +178,11 @@ def __getattribute__(self, attr): # Only the first thread to get the lock should trigger the load # and reset the module's class. The rest can now getattr(). if object.__getattribute__(self, '__class__') is _LazyModule: - # The first thread comes here multiple times as it descends the - # call stack. The first time, it sets is_loading and triggers - # exec_module(), which will access module.__dict__, module.__name__, - # and/or module.__spec__, reentering this method. These accesses - # need to be allowed to proceed without triggering the load again. - if loader_state['is_loading'] and attr.startswith('__') and attr.endswith('__'): + # Reentrant calls from the same thread must be allowed to proceed without + # triggering the load again. + # exec_module() and self-referential imports are the primary ways this can + # happen, but in any case we must return something to avoid deadlock. + if loader_state['is_loading']: return object.__getattribute__(self, attr) loader_state['is_loading'] = True diff --git a/Lib/test/test_importlib/test_lazy.py b/Lib/test/test_importlib/test_lazy.py index 38ab21907b58d9..4d2cc4eb62b67c 100644 --- a/Lib/test/test_importlib/test_lazy.py +++ b/Lib/test/test_importlib/test_lazy.py @@ -178,6 +178,24 @@ def access_module(): # Or multiple load attempts self.assertEqual(loader.load_count, 1) + def test_lazy_self_referential_modules(self): + # Directory modules with submodules that reference the parent can attempt to access + # the parent module during a load. Verify that this common pattern works with lazy loading. + # json is a good example in the stdlib. + json_modules = [name for name in sys.modules if name.startswith('json')] + with test_util.uncache(*json_modules): + # Standard lazy loading, unwrapped + spec = util.find_spec('json') + loader = util.LazyLoader(spec.loader) + spec.loader = loader + module = util.module_from_spec(spec) + sys.modules['json'] = module + loader.exec_module(module) + + # Trigger load with attribute lookup, ensure expected behavior + test_load = module.loads('{}') + self.assertEqual(test_load, {}) + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2024-03-23-14-26-18.gh-issue-117178.vTisTG.rst b/Misc/NEWS.d/next/Library/2024-03-23-14-26-18.gh-issue-117178.vTisTG.rst new file mode 100644 index 00000000000000..f9c53ebbfc3c96 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-03-23-14-26-18.gh-issue-117178.vTisTG.rst @@ -0,0 +1,2 @@ +Fix regression in lazy loading of self-referential modules, introduced in +gh-114781.