From 688fa7444522d533544055dce20699debe5bbc23 Mon Sep 17 00:00:00 2001 From: Peter Allen Webb Date: Tue, 7 Nov 2023 19:01:58 -0500 Subject: [PATCH 1/3] Cache plugin modules --- core/dbt/plugins/manager.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/core/dbt/plugins/manager.py b/core/dbt/plugins/manager.py index cc71d5ab6bf..aa651a387ea 100644 --- a/core/dbt/plugins/manager.py +++ b/core/dbt/plugins/manager.py @@ -1,6 +1,8 @@ +import functools import importlib import pkgutil -from typing import Dict, List, Callable +from types import ModuleType +from typing import Dict, List, Callable, Mapping from dbt.contracts.graph.manifest import Manifest from dbt.exceptions import DbtRuntimeError @@ -63,6 +65,17 @@ def get_manifest_artifacts(self, manifest: Manifest) -> PluginArtifacts: raise NotImplementedError(f"get_manifest_artifacts hook not implemented for {self.name}") +@functools.cache +def _get_dbt_modules() -> Mapping[str, ModuleType]: + # This is an expensive function, especially in the context of testing, when + # it is called repeatedly, so we break it out and cache the result globally. + return { + name: importlib.import_module(name) + for _, name, _ in pkgutil.iter_modules() + if name.startswith(PluginManager.PLUGIN_MODULE_PREFIX) + } + + class PluginManager: PLUGIN_MODULE_PREFIX = "dbt_" PLUGIN_ATTR_NAME = "plugins" @@ -91,11 +104,7 @@ def __init__(self, plugins: List[dbtPlugin]) -> None: @classmethod def from_modules(cls, project_name: str) -> "PluginManager": - discovered_dbt_modules = { - name: importlib.import_module(name) - for _, name, _ in pkgutil.iter_modules() - if name.startswith(cls.PLUGIN_MODULE_PREFIX) - } + discovered_dbt_modules = _get_dbt_modules() plugins = [] for name, module in discovered_dbt_modules.items(): From 2e84dc0e18c5437b24f2842fcf9088c003224dc1 Mon Sep 17 00:00:00 2001 From: Peter Allen Webb Date: Tue, 7 Nov 2023 19:16:39 -0500 Subject: [PATCH 2/3] Add changelog entry --- .changes/unreleased/Under the Hood-20231107-191546.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Under the Hood-20231107-191546.yaml diff --git a/.changes/unreleased/Under the Hood-20231107-191546.yaml b/.changes/unreleased/Under the Hood-20231107-191546.yaml new file mode 100644 index 00000000000..d81c0448c63 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20231107-191546.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Cache dbt plugin modules to improve integration test performance +time: 2023-11-07T19:15:46.170151-05:00 +custom: + Author: peterallenwebb + Issue: "9029" From 0e6f43f945a1cb7d69a0f09aef160cf417039302 Mon Sep 17 00:00:00 2001 From: Peter Allen Webb Date: Mon, 20 Nov 2023 17:29:39 -0500 Subject: [PATCH 3/3] Use lru_cache to keep Python 3.8 happy. --- core/dbt/plugins/manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/dbt/plugins/manager.py b/core/dbt/plugins/manager.py index aa651a387ea..ba6756d49fc 100644 --- a/core/dbt/plugins/manager.py +++ b/core/dbt/plugins/manager.py @@ -65,7 +65,7 @@ def get_manifest_artifacts(self, manifest: Manifest) -> PluginArtifacts: raise NotImplementedError(f"get_manifest_artifacts hook not implemented for {self.name}") -@functools.cache +@functools.lru_cache(maxsize=None) def _get_dbt_modules() -> Mapping[str, ModuleType]: # This is an expensive function, especially in the context of testing, when # it is called repeatedly, so we break it out and cache the result globally.