Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[internal] Simplify import resolvers #1746

Merged
merged 6 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions src/databricks/labs/ucx/contexts/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
from databricks.labs.ucx.source_code.files import FileLoader, LocalFileResolver
from databricks.labs.ucx.source_code.path_lookup import PathLookup
from databricks.labs.ucx.source_code.graph import DependencyResolver
from databricks.labs.ucx.source_code.whitelist import WhitelistResolver, Whitelist
from databricks.labs.ucx.source_code.whitelist import Whitelist
from databricks.labs.ucx.source_code.languages import Languages
from databricks.labs.ucx.source_code.redash import Redash
from databricks.labs.ucx.workspace_access import generic, redash
Expand Down Expand Up @@ -381,19 +381,14 @@ def whitelist(self):
# TODO: fill in the whitelist
return Whitelist()

@cached_property
def whitelist_resolver(self):
return WhitelistResolver(self.whitelist)

@cached_property
def file_resolver(self):
return LocalFileResolver(self.file_loader)
return LocalFileResolver(self.file_loader, self.whitelist)

@cached_property
def dependency_resolver(self):
import_resolvers = [self.file_resolver, self.whitelist_resolver]
library_resolvers = [self.pip_resolver]
return DependencyResolver(library_resolvers, self.notebook_resolver, import_resolvers, self.path_lookup)
return DependencyResolver(library_resolvers, self.notebook_resolver, self.file_resolver, self.path_lookup)

@cached_property
def workflow_linter(self):
Expand Down
24 changes: 18 additions & 6 deletions src/databricks/labs/ucx/source_code/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pathlib import Path

from databricks.labs.ucx.source_code.path_lookup import PathLookup
from databricks.labs.ucx.source_code.whitelist import Whitelist, WhitelistResolver
from databricks.sdk.service.workspace import Language

from databricks.labs.ucx.source_code.languages import Languages
Expand Down Expand Up @@ -111,13 +112,11 @@ def __repr__(self):

class LocalFileResolver(BaseImportResolver, BaseFileResolver):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
class LocalFileResolver(BaseImportResolver, BaseFileResolver):
class ImportResolver(BaseFileResolver):

and get rid of BaseImportResolver.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

renamed to ImportFileResolver


def __init__(self, file_loader: FileLoader, next_resolver: BaseImportResolver | None = None):
super().__init__(next_resolver)
def __init__(self, file_loader: FileLoader, whitelist: Whitelist):
super().__init__()
self._whitelist_resolver = WhitelistResolver(whitelist)
self._file_loader = file_loader

def with_next_resolver(self, resolver: BaseImportResolver) -> BaseImportResolver:
return LocalFileResolver(self._file_loader, resolver)

def resolve_local_file(self, path_lookup, path: Path) -> MaybeDependency:
absolute_path = path_lookup.resolve(path)
if absolute_path:
Expand All @@ -126,6 +125,15 @@ def resolve_local_file(self, path_lookup, path: Path) -> MaybeDependency:
return MaybeDependency(None, [problem])

def resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency:
maybe = self._whitelist_resolver.resolve_import(name)
if maybe is not None:
return maybe
maybe = self._resolve_import(path_lookup, name)
if maybe is not None:
return maybe
return self._fail('import-not-found', f"Could not locate import: {name}")

def _resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency | None:
if not name:
return MaybeDependency(None, [DependencyProblem("ucx-bug", "Import name is empty")])
parts = []
Expand All @@ -148,7 +156,11 @@ def resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency:
continue
dependency = Dependency(self._file_loader, absolute_path)
return MaybeDependency(dependency, [])
return super().resolve_import(path_lookup, name)
return None

@staticmethod
def _fail(code: str, message: str):
return MaybeDependency(None, [DependencyProblem(code, message)])

def __repr__(self):
return "LocalFileResolver()"
49 changes: 5 additions & 44 deletions src/databricks/labs/ucx/source_code/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,21 +277,9 @@ def _fail(code: str, message: str) -> MaybeDependency:

class BaseImportResolver(abc.ABC):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

redundant: we no longer need BaseImportResolver

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's necessary to avoid circular imports (same as BaseNotebookResolver)


def __init__(self, next_resolver: BaseImportResolver | None):
self._next_resolver = next_resolver

@abc.abstractmethod
def with_next_resolver(self, resolver: BaseImportResolver) -> BaseImportResolver:
"""required to create a linked list of resolvers"""

@property
def next_resolver(self):
return self._next_resolver

def resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency:
# TODO: remove StubResolver and return MaybeDependency(None, [...])
assert self._next_resolver is not None
return self._next_resolver.resolve_import(path_lookup, name)
"""resolve a simple or composite import name"""


class BaseFileResolver(abc.ABC):
Expand All @@ -317,22 +305,6 @@ def _fail(code: str, message: str):
return MaybeDependency(None, [DependencyProblem(code, message)])


class StubImportResolver(BaseImportResolver):

def __init__(self):
super().__init__(None)

def with_next_resolver(self, resolver: BaseImportResolver) -> BaseImportResolver:
raise NotImplementedError("Should never happen!")

def resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency:
return self._fail('import-not-found', f"Could not locate import: {name}")

@staticmethod
def _fail(code: str, message: str):
return MaybeDependency(None, [DependencyProblem(code, message)])


@dataclass
class MaybeDependency:
dependency: Dependency | None
Expand All @@ -351,12 +323,12 @@ def __init__(
self,
library_resolvers: list[BaseLibraryResolver],
notebook_resolver: BaseNotebookResolver,
import_resolvers: list[BaseImportResolver],
import_resolver: BaseImportResolver,
path_lookup: PathLookup,
):
self._library_resolver = self._chain_library_resolvers(library_resolvers)
self._notebook_resolver = notebook_resolver
self._import_resolver = self._chain_import_resolvers(import_resolvers)
self._import_resolver = import_resolver
self._path_lookup = path_lookup

@staticmethod
Expand All @@ -367,14 +339,6 @@ def _chain_library_resolvers(library_resolvers: list[BaseLibraryResolver]) -> Ba
previous = resolver
return previous

@staticmethod
def _chain_import_resolvers(import_resolvers: list[BaseImportResolver]) -> BaseImportResolver:
previous: BaseImportResolver = StubImportResolver()
for resolver in import_resolvers:
resolver = resolver.with_next_resolver(previous)
previous = resolver
return previous

def resolve_notebook(self, path_lookup: PathLookup, path: Path) -> MaybeDependency:
return self._notebook_resolver.resolve_notebook(path_lookup, path)

Expand Down Expand Up @@ -406,11 +370,8 @@ def build_local_file_dependency_graph(self, path: Path) -> MaybeGraph:

@property
def _local_file_resolver(self) -> BaseFileResolver | None:
resolver = self._import_resolver
while resolver is not None:
if isinstance(resolver, BaseFileResolver):
return resolver
resolver = resolver.next_resolver
if isinstance(self._import_resolver, BaseFileResolver):
return self._import_resolver
return None

def build_notebook_dependency_graph(self, path: Path) -> MaybeGraph:
Expand Down
15 changes: 5 additions & 10 deletions src/databricks/labs/ucx/source_code/whitelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,8 @@

from yaml import load_all as load_yaml, Loader

from databricks.labs.ucx.source_code.path_lookup import PathLookup

from databricks.labs.ucx.source_code.graph import (
BaseImportResolver,
DependencyGraph,
DependencyProblem,
MaybeDependency,
Expand All @@ -22,16 +20,13 @@
logger = logging.getLogger(__name__)


class WhitelistResolver(BaseImportResolver):
class WhitelistResolver:

def __init__(self, whitelist: Whitelist, next_resolver: BaseImportResolver | None = None):
super().__init__(next_resolver)
def __init__(self, whitelist: Whitelist):
super().__init__()
self._whitelist = whitelist

def with_next_resolver(self, resolver: BaseImportResolver) -> BaseImportResolver:
return WhitelistResolver(self._whitelist, resolver)

def resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency:
def resolve_import(self, name: str) -> MaybeDependency | None:
# TODO attach compatibility to dependency, see https://github.com/databrickslabs/ucx/issues/1382
compatibility = self._whitelist.compatibility(name)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move the body of this method as private method to ImportResolver class and get rid of WhitelistResolver.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

if compatibility == UCCompatibility.FULL:
Expand All @@ -43,7 +38,7 @@ def resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency:
if compatibility == UCCompatibility.PARTIAL:
problem = DependencyProblem("dependency-check", f"Package {name} is only partially supported by UC")
return MaybeDependency(None, [problem])
return super().resolve_import(path_lookup, name)
return None


class StubContainer(SourceContainer):
Expand Down
72 changes: 32 additions & 40 deletions tests/unit/source_code/test_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from databricks.labs.ucx.source_code.files import FileLoader, LocalFileResolver
from databricks.labs.ucx.source_code.path_lookup import PathLookup
from databricks.labs.ucx.source_code.python_libraries import PipResolver
from databricks.labs.ucx.source_code.whitelist import WhitelistResolver, Whitelist
from databricks.labs.ucx.source_code.whitelist import Whitelist
from tests.unit import (
locate_site_packages,
_samples_path,
Expand Down Expand Up @@ -46,30 +46,23 @@ def test_dependency_resolver_visits_local_notebook_dependencies(mock_path_lookup


def test_dependency_resolver_visits_workspace_file_dependencies(mock_path_lookup):
whi = Whitelist()
whitelist = Whitelist()
file_loader = FileLoader()
notebook_loader = NotebookLoader()
notebook_resolver = NotebookResolver(notebook_loader)
import_resolvers = [
WhitelistResolver(whi),
LocalFileResolver(file_loader),
]
dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup)
import_resolver = LocalFileResolver(file_loader, whitelist)
dependency_resolver = DependencyResolver([], notebook_resolver, import_resolver, mock_path_lookup)
maybe = dependency_resolver.build_local_file_dependency_graph(Path('./root8.py'))
assert not maybe.failed
assert maybe.graph.all_relative_names() == {'leaf1.py', 'leaf2.py', 'root8.py'}


def test_dependency_resolver_raises_problem_with_unfound_workspace_notebook_dependency(mock_path_lookup):
whi = Whitelist()
file_loader = FileLoader()
notebook_loader = NotebookLoader()
notebook_resolver = NotebookResolver(notebook_loader)
import_resolvers = [
WhitelistResolver(whi),
LocalFileResolver(file_loader),
]
dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup)
import_resolver = LocalFileResolver(file_loader, Whitelist())
dependency_resolver = DependencyResolver([], notebook_resolver, import_resolver, mock_path_lookup)
maybe = dependency_resolver.build_notebook_dependency_graph(Path("root1-no-leaf.run.py"))
assert list(maybe.problems) == [
DependencyProblem(
Expand Down Expand Up @@ -127,8 +120,8 @@ def test_dependency_resolver_raises_problem_with_invalid_run_cell(mock_path_look
def test_dependency_resolver_visits_recursive_file_dependencies(mock_path_lookup):
notebook_loader = NotebookLoader()
notebook_resolver = NotebookResolver(notebook_loader)
import_resolvers = [LocalFileResolver(FileLoader())]
dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup)
import_resolver = LocalFileResolver(FileLoader(), Whitelist())
dependency_resolver = DependencyResolver([], notebook_resolver, import_resolver, mock_path_lookup)
maybe = dependency_resolver.build_local_file_dependency_graph(Path("root6.py"))
assert not maybe.failed
assert maybe.graph.all_relative_names() == {"root6.py", "root5.py", "leaf4.py"}
Expand All @@ -137,8 +130,8 @@ def test_dependency_resolver_visits_recursive_file_dependencies(mock_path_lookup
def test_dependency_resolver_raises_problem_with_unresolved_import(mock_path_lookup):
notebook_loader = NotebookLoader()
notebook_resolver = NotebookResolver(notebook_loader)
import_resolvers = [LocalFileResolver(FileLoader())]
dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup)
import_resolver = LocalFileResolver(FileLoader(), Whitelist())
dependency_resolver = DependencyResolver([], notebook_resolver, import_resolver, mock_path_lookup)
maybe = dependency_resolver.build_local_file_dependency_graph(Path('root7.py'))
assert list(maybe.problems) == [
DependencyProblem('import-not-found', 'Could not locate import: some_library', Path("root7.py"), 1, 0, 1, 19)
Expand All @@ -148,8 +141,8 @@ def test_dependency_resolver_raises_problem_with_unresolved_import(mock_path_loo
def test_dependency_resolver_raises_problem_with_non_constant_notebook_argument(mock_path_lookup):
notebook_loader = NotebookLoader()
notebook_resolver = NotebookResolver(notebook_loader)
import_resolvers = [WhitelistResolver(Whitelist()), LocalFileResolver(FileLoader())]
dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup)
import_resolver = LocalFileResolver(FileLoader(), Whitelist())
dependency_resolver = DependencyResolver([], notebook_resolver, import_resolver, mock_path_lookup)
maybe = dependency_resolver.build_local_file_dependency_graph(Path("run_notebooks.py"))
assert list(maybe.problems) == [
DependencyProblem(
Expand All @@ -167,8 +160,8 @@ def test_dependency_resolver_raises_problem_with_non_constant_notebook_argument(
def test_dependency_resolver_visits_file_dependencies(mock_path_lookup):
notebook_loader = NotebookLoader()
notebook_resolver = NotebookResolver(notebook_loader)
import_resolvers = [LocalFileResolver(FileLoader())]
dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup)
import_resolver = LocalFileResolver(FileLoader(), Whitelist())
dependency_resolver = DependencyResolver([], notebook_resolver, import_resolver, mock_path_lookup)
maybe = dependency_resolver.build_local_file_dependency_graph(Path("root5.py"))
assert not maybe.failed
assert maybe.graph.all_relative_names() == {"root5.py", "leaf4.py"}
Expand All @@ -177,8 +170,8 @@ def test_dependency_resolver_visits_file_dependencies(mock_path_lookup):
def test_dependency_resolver_skips_builtin_dependencies(mock_path_lookup):
notebook_loader = NotebookLoader()
notebook_resolver = NotebookResolver(notebook_loader)
import_resolvers = [WhitelistResolver(Whitelist()), LocalFileResolver(FileLoader())]
dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup)
import_resolver = LocalFileResolver(FileLoader(), Whitelist())
dependency_resolver = DependencyResolver([], notebook_resolver, import_resolver, mock_path_lookup)
maybe = dependency_resolver.build_local_file_dependency_graph(Path("python_builtins.py"))
assert not maybe.failed
graph = maybe.graph
Expand All @@ -191,8 +184,8 @@ def test_dependency_resolver_skips_builtin_dependencies(mock_path_lookup):
def test_dependency_resolver_ignores_known_dependencies(mock_path_lookup):
notebook_loader = NotebookLoader()
notebook_resolver = NotebookResolver(notebook_loader)
import_resolvers = [WhitelistResolver(Whitelist()), LocalFileResolver(FileLoader())]
dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup)
import_resolver = LocalFileResolver(FileLoader(), Whitelist())
dependency_resolver = DependencyResolver([], notebook_resolver, import_resolver, mock_path_lookup)
maybe = dependency_resolver.build_local_file_dependency_graph(Path("python_builtins.py"))
assert maybe.graph
graph = maybe.graph
Expand All @@ -205,11 +198,8 @@ def test_dependency_resolver_visits_site_packages(empty_index, mock_notebook_res
lookup = PathLookup.from_pathlike_string(Path.cwd(), _samples_path(SourceContainer))
lookup.append_path(site_packages_path)
file_loader = FileLoader()
import_resolvers = [
WhitelistResolver(Whitelist()),
LocalFileResolver(file_loader),
]
dependency_resolver = DependencyResolver([], mock_notebook_resolver, import_resolvers, lookup)
import_resolver = LocalFileResolver(file_loader, Whitelist())
dependency_resolver = DependencyResolver([], mock_notebook_resolver, import_resolver, lookup)
maybe = dependency_resolver.build_local_file_dependency_graph(Path("import-site-package.py"))
assert not maybe.failed
graph = maybe.graph
Expand All @@ -229,20 +219,17 @@ def test_dependency_resolver_resolves_sub_site_package():
file_loader = FileLoader()
notebook_loader = NotebookLoader()
notebook_resolver = NotebookResolver(notebook_loader)
import_resolvers = [
LocalFileResolver(file_loader),
WhitelistResolver(whitelist),
]
dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, lookup)
import_resolver = LocalFileResolver(file_loader, whitelist)
dependency_resolver = DependencyResolver([], notebook_resolver, import_resolver, lookup)
maybe = dependency_resolver.build_local_file_dependency_graph(Path("import-sub-site-package.py"))
assert maybe.graph
maybe = maybe.graph.locate_dependency(Path(site_packages_path, "databricks", "labs", "lsql", "core.py"))
assert maybe.graph


def test_dependency_resolver_raises_problem_with_unfound_root_file(mock_path_lookup, mock_notebook_resolver):
import_resolvers = [LocalFileResolver(FileLoader())]
dependency_resolver = DependencyResolver([], mock_notebook_resolver, import_resolvers, mock_path_lookup)
import_resolver = LocalFileResolver(FileLoader(), Whitelist())
dependency_resolver = DependencyResolver([], mock_notebook_resolver, import_resolver, mock_path_lookup)
maybe = dependency_resolver.build_local_file_dependency_graph(Path("non-existing.py"))
assert list(maybe.problems) == [
DependencyProblem('file-not-found', 'File not found: non-existing.py', Path("non-existing.py"))
Expand All @@ -266,8 +253,8 @@ def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> So
return None

file_loader = FailingFileLoader()
import_resolvers = [LocalFileResolver(file_loader)]
dependency_resolver = DependencyResolver([], mock_notebook_resolver, import_resolvers, mock_path_lookup)
import_resolver = LocalFileResolver(file_loader, Whitelist())
dependency_resolver = DependencyResolver([], mock_notebook_resolver, import_resolver, mock_path_lookup)
maybe = dependency_resolver.build_local_file_dependency_graph(Path("import-sub-site-package.py"))
assert list(maybe.problems) == [
DependencyProblem(
Expand Down Expand Up @@ -301,8 +288,13 @@ def test_dependency_resolver_raises_problem_with_missing_file_loader(mock_notebo

def test_dependency_resolver_resolves_already_installed_library_dependency(mock_notebook_resolver):
path_lookup = PathLookup.from_sys_path(Path.cwd())
file_loader = FileLoader()
whitelist = Whitelist()
dependency_resolver = DependencyResolver(
[PipResolver(FileLoader(), Whitelist())], mock_notebook_resolver, [], path_lookup
[PipResolver(file_loader, whitelist)],
mock_notebook_resolver,
LocalFileResolver(file_loader, whitelist),
path_lookup,
)
maybe = dependency_resolver.build_library_dependency_graph(Path("astroid"))
library_graph = maybe.graph
Expand Down
Loading
Loading