Skip to content

Commit

Permalink
Fix "too many values to unpack" error in integration tests (#2807)
Browse files Browse the repository at this point in the history
## Changes
Fix "too many values to unpack" error in integration tests introduced in
#2772

### Linked issues

Resolves #2806
Resolves #2805
Resolves #2804
Resolves #2803
Resolves #2802
Resolves #2801
Resolves #2799
Resolves #2798

### Tests

- [x] fixed integration tests
  • Loading branch information
JCZuurmond authored Oct 3, 2024
1 parent 6cc819a commit 9de04eb
Showing 1 changed file with 69 additions and 41 deletions.
110 changes: 69 additions & 41 deletions tests/integration/source_code/test_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,22 @@ def test_linter_from_context(simple_ctx, make_job, make_notebook):
assert result['count'] > 0


def test_job_linter_no_problems(simple_ctx, make_job):
def test_job_linter_no_problems(simple_ctx, make_job) -> None:
j = make_job()

problems, _dfsas = simple_ctx.workflow_linter.lint_job(j.job_id)
problems, *_ = simple_ctx.workflow_linter.lint_job(j.job_id)

assert len(problems) == 0


def test_job_task_linter_library_not_installed_cluster(
simple_ctx, make_job, make_random, make_cluster, make_notebook, make_directory
):
simple_ctx,
make_job,
make_random,
make_cluster,
make_notebook,
make_directory,
) -> None:
created_cluster = make_cluster(single_node=True)
entrypoint = make_directory()

Expand All @@ -93,13 +98,20 @@ def test_job_task_linter_library_not_installed_cluster(
)
j = make_job(tasks=[task])

problems, _dfsas = simple_ctx.workflow_linter.lint_job(j.job_id)
problems, *_ = simple_ctx.workflow_linter.lint_job(j.job_id)

assert len([problem for problem in problems if problem.message == "Could not locate import: greenlet"]) == 1


def test_job_task_linter_library_installed_cluster(
simple_ctx, ws, make_job, make_random, make_cluster, make_notebook, make_directory
):
simple_ctx,
ws,
make_job,
make_random,
make_cluster,
make_notebook,
make_directory,
) -> None:
created_cluster = make_cluster(single_node=True)
libraries_api = ws.libraries
libraries_api.install(created_cluster.cluster_id, [Library(pypi=PythonPyPiLibrary("greenlet"))])
Expand All @@ -117,7 +129,8 @@ def test_job_task_linter_library_installed_cluster(
)
j = make_job(tasks=[task])

problems, _dfsas = simple_ctx.workflow_linter.lint_job(j.job_id)
problems, *_ = simple_ctx.workflow_linter.lint_job(j.job_id)

assert len([problem for problem in problems if problem.message == "Could not locate import: greenlet"]) == 0


Expand All @@ -129,7 +142,7 @@ def test_job_linter_some_notebook_graph_with_problems(
make_random,
caplog,
watchdog_purge_suffix,
):
) -> None:
expected_messages = {
'some_file.py:0 [direct-filesystem-access] The use of direct filesystem references is deprecated: /mnt/foo/bar',
'second_notebook:3 [direct-filesystem-access] The use of direct filesystem references is deprecated: /mnt/something',
Expand All @@ -153,7 +166,7 @@ def test_job_linter_some_notebook_graph_with_problems(
(entrypoint / 'some_file.py').write_text('display(spark.read.parquet("/mnt/foo/bar"))')

with caplog.at_level(logging.WARNING, logger="databricks.labs.ucx.source_code.jobs"):
problems, dfsas = simple_ctx.workflow_linter.lint_job(j.job_id)
problems, dfsas, _ = simple_ctx.workflow_linter.lint_job(j.job_id)

root = Path(entrypoint.as_posix())
messages = {replace(p, path=Path(p.path).relative_to(root)).as_message() for p in problems}
Expand Down Expand Up @@ -185,7 +198,7 @@ def test_workflow_linter_lints_job_with_import_pypi_library(
make_notebook,
make_random,
watchdog_purge_suffix,
):
) -> None:
entrypoint = WorkspacePath(ws, f"~/linter-{make_random(4)}-{watchdog_purge_suffix}").expanduser()
entrypoint.mkdir()

Expand All @@ -197,14 +210,15 @@ def test_workflow_linter_lints_job_with_import_pypi_library(
make_notebook(path=notebook, content=b"import greenlet")

job_without_pytest_library = make_job(notebook_path=notebook)
problems, _dfsas = simple_ctx.workflow_linter.lint_job(job_without_pytest_library.job_id)

problems, *_ = simple_ctx.workflow_linter.lint_job(job_without_pytest_library.job_id)

assert len([problem for problem in problems if problem.message == "Could not locate import: greenlet"]) > 0

library = compute.Library(pypi=compute.PythonPyPiLibrary(package="greenlet"))
job_with_pytest_library = make_job(notebook_path=notebook, libraries=[library])

problems, _dfsas = simple_ctx.workflow_linter.lint_job(job_with_pytest_library.job_id)
problems, *_ = simple_ctx.workflow_linter.lint_job(job_with_pytest_library.job_id)

assert len([problem for problem in problems if problem.message == "Could not locate import: greenlet"]) == 0

Expand Down Expand Up @@ -289,7 +303,7 @@ def test_workflow_linter_lints_job_with_workspace_requirements_dependency(
make_job,
make_notebook,
make_directory,
):
) -> None:
# A requirement that can definitely not be found.
requirements = "a_package_that_does_not_exist\n"

Expand All @@ -304,7 +318,8 @@ def test_workflow_linter_lints_job_with_workspace_requirements_dependency(
notebook = make_notebook(path=f"{entrypoint}/notebook.ipynb", content=python_code.encode("utf-8"))
job_with_pytest_library = make_job(notebook_path=notebook, libraries=[library])

problems, _dfsas = simple_ctx.workflow_linter.lint_job(job_with_pytest_library.job_id)
problems, *_ = simple_ctx.workflow_linter.lint_job(job_with_pytest_library.job_id)

messages = tuple(problem.message for problem in problems)
expected_messages = (
"ERROR: Could not find a version that satisfies the requirement a_package_that_does_not_exist",
Expand All @@ -322,7 +337,7 @@ def test_workflow_linter_lints_job_with_dbfs_requirements_dependency(
make_notebook,
make_directory,
make_dbfs_directory,
):
) -> None:
# A requirement that can definitely not be found.
requirements = "a_package_that_does_not_exist\n"

Expand All @@ -337,7 +352,8 @@ def test_workflow_linter_lints_job_with_dbfs_requirements_dependency(
notebook = make_notebook(path=f"{entrypoint}/notebook.ipynb", content=python_code.encode("utf-8"))
job_with_pytest_library = make_job(notebook_path=notebook, libraries=[library])

problems, _dfsas = simple_ctx.workflow_linter.lint_job(job_with_pytest_library.job_id)
problems, *_ = simple_ctx.workflow_linter.lint_job(job_with_pytest_library.job_id)

messages = tuple(problem.message for problem in problems)
expected_messages = (
"ERROR: Could not find a version that satisfies the requirement a_package_that_does_not_exist",
Expand All @@ -354,7 +370,7 @@ def test_workflow_linter_lints_job_with_workspace_egg_dependency(
make_job,
make_notebook,
make_directory,
):
) -> None:
expected_problem_message = "Could not locate import: thingy"
egg_file = Path(__file__).parent / "../../unit/source_code/samples/distribution/dist/thingy-0.0.1-py3.10.egg"

Expand All @@ -367,7 +383,7 @@ def test_workflow_linter_lints_job_with_workspace_egg_dependency(
notebook = make_notebook(path=f"{entrypoint}/notebook.ipynb", content=b"import thingy\n")
job_with_egg_dependency = make_job(notebook_path=notebook, libraries=[library])

problems, _dfsas = simple_ctx.workflow_linter.lint_job(job_with_egg_dependency.job_id)
problems, *_ = simple_ctx.workflow_linter.lint_job(job_with_egg_dependency.job_id)

assert not [problem for problem in problems if problem.message == expected_problem_message]

Expand All @@ -378,7 +394,7 @@ def test_workflow_linter_lints_job_with_dbfs_egg_dependency(
make_notebook,
make_directory,
make_dbfs_directory,
):
) -> None:
expected_problem_message = "Could not locate import: thingy"
egg_file = Path(__file__).parent / "../../unit/source_code/samples/distribution/dist/thingy-0.0.1-py3.10.egg"

Expand All @@ -391,12 +407,12 @@ def test_workflow_linter_lints_job_with_dbfs_egg_dependency(
notebook = make_notebook(path=f"{entrypoint}/notebook.ipynb", content=b"import thingy\n")
job_with_egg_dependency = make_job(notebook_path=notebook, libraries=[library])

problems, _dfsas = simple_ctx.workflow_linter.lint_job(job_with_egg_dependency.job_id)
problems, *_ = simple_ctx.workflow_linter.lint_job(job_with_egg_dependency.job_id)

assert not [problem for problem in problems if problem.message == expected_problem_message]


def test_workflow_linter_lints_job_with_missing_library(simple_ctx, make_job, make_notebook, make_directory):
def test_workflow_linter_lints_job_with_missing_library(simple_ctx, make_job, make_notebook, make_directory) -> None:
expected_problem_message = "Could not locate import: databricks.labs.ucx"
allow_list = create_autospec(KnownList) # databricks is in default list
allow_list.module_compatibility.return_value = UNKNOWN
Expand All @@ -409,13 +425,13 @@ def test_workflow_linter_lints_job_with_missing_library(simple_ctx, make_job, ma
notebook = make_notebook(path=f"{make_directory()}/notebook.ipynb", content=b"import databricks.labs.ucx")
job_without_ucx_library = make_job(notebook_path=notebook)

problems, _dfsas = simple_ctx.workflow_linter.lint_job(job_without_ucx_library.job_id)
problems, *_ = simple_ctx.workflow_linter.lint_job(job_without_ucx_library.job_id)

assert len([problem for problem in problems if problem.message == expected_problem_message]) > 0
allow_list.module_compatibility.assert_called_once_with("databricks.labs.ucx")


def test_workflow_linter_lints_job_with_wheel_dependency(simple_ctx, make_job, make_notebook, make_directory):
def test_workflow_linter_lints_job_with_wheel_dependency(simple_ctx, make_job, make_notebook, make_directory) -> None:
expected_problem_message = "Could not locate import: databricks.labs.ucx"

simple_ctx = simple_ctx.replace(
Expand All @@ -430,7 +446,7 @@ def test_workflow_linter_lints_job_with_wheel_dependency(simple_ctx, make_job, m
notebook = make_notebook(path=f"{make_directory()}/notebook.ipynb", content=b"import databricks.labs.ucx")
job_with_ucx_library = make_job(notebook_path=notebook, libraries=[library])

problems, _dfsas = simple_ctx.workflow_linter.lint_job(job_with_ucx_library.job_id)
problems, *_ = simple_ctx.workflow_linter.lint_job(job_with_ucx_library.job_id)

assert len([problem for problem in problems if problem.message == expected_problem_message]) == 0

Expand All @@ -442,7 +458,7 @@ def test_job_spark_python_task_linter_happy_path(
make_cluster,
make_notebook,
make_directory,
):
) -> None:
entrypoint = make_directory()

make_notebook(path=f"{entrypoint}/notebook.py", content=b"import greenlet")
Expand All @@ -458,13 +474,19 @@ def test_job_spark_python_task_linter_happy_path(
)
j = make_job(tasks=[task])

problems, _dfsas = simple_ctx.workflow_linter.lint_job(j.job_id)
problems, *_ = simple_ctx.workflow_linter.lint_job(j.job_id)

assert len([problem for problem in problems if problem.message == "Could not locate import: greenlet"]) == 0


def test_job_spark_python_task_linter_unhappy_path(
simple_ctx, make_job, make_random, make_cluster, make_notebook, make_directory
):
simple_ctx,
make_job,
make_random,
make_cluster,
make_notebook,
make_directory,
) -> None:
entrypoint = make_directory()

make_notebook(path=f"{entrypoint}/notebook.py", content=b"import greenlet")
Expand All @@ -479,11 +501,12 @@ def test_job_spark_python_task_linter_unhappy_path(
)
j = make_job(tasks=[task])

problems, _dfsas = simple_ctx.workflow_linter.lint_job(j.job_id)
problems, *_ = simple_ctx.workflow_linter.lint_job(j.job_id)

assert len([problem for problem in problems if problem.message == "Could not locate import: greenlet"]) == 1


def test_workflow_linter_lints_python_wheel_task(simple_ctx, ws, make_job, make_random):
def test_workflow_linter_lints_python_wheel_task(simple_ctx, ws, make_job, make_random) -> None:
allow_list = create_autospec(KnownList) # databricks is in default list
allow_list.module_compatibility.return_value = UNKNOWN
allow_list.distribution_compatibility.return_value = UNKNOWN
Expand Down Expand Up @@ -511,7 +534,7 @@ def test_workflow_linter_lints_python_wheel_task(simple_ctx, ws, make_job, make_
)
job_with_ucx_library = make_job(tasks=[task])

problems, _dfsas = simple_ctx.workflow_linter.lint_job(job_with_ucx_library.job_id)
problems, *_ = simple_ctx.workflow_linter.lint_job(job_with_ucx_library.job_id)

assert len([problem for problem in problems if problem.code == "library-dist-info-not-found"]) == 0
assert len([problem for problem in problems if problem.code == "library-entrypoint-not-found"]) == 0
Expand All @@ -537,7 +560,8 @@ def test_job_spark_python_task_workspace_linter_happy_path(
)
j = make_job(tasks=[task])

problems, _dfsas = simple_ctx.workflow_linter.lint_job(j.job_id)
problems, *_ = simple_ctx.workflow_linter.lint_job(j.job_id)

assert not [problem for problem in problems if problem.message == "Could not locate import: greenlet"]


Expand All @@ -560,7 +584,8 @@ def test_job_spark_python_task_dbfs_linter_happy_path(
)
j = make_job(tasks=[task])

problems, _dfsas = simple_ctx.workflow_linter.lint_job(j.job_id)
problems, *_ = simple_ctx.workflow_linter.lint_job(j.job_id)

assert not [problem for problem in problems if problem.message == "Could not locate import: greenlet"]


Expand Down Expand Up @@ -588,7 +613,8 @@ def test_job_spark_python_task_linter_notebook_handling(
)
j = make_job(tasks=[task])

problems, _dfsas = simple_ctx.workflow_linter.lint_job(j.job_id)
problems, *_ = simple_ctx.workflow_linter.lint_job(j.job_id)

# The notebook being linted has 'import greenlet' in a cell that should be ignored, but will trigger this problem if processed.
assert not [problem for problem in problems if problem.message == "Could not locate import: greenlet"]

Expand All @@ -600,7 +626,7 @@ def test_job_dlt_task_linter_unhappy_path(
make_notebook,
make_directory,
make_pipeline,
):
) -> None:
entrypoint = make_directory()
make_notebook(path=f"{entrypoint}/notebook.py", content=b"import greenlet")
dlt_pipeline = make_pipeline(
Expand All @@ -613,7 +639,8 @@ def test_job_dlt_task_linter_unhappy_path(
)
j = make_job(tasks=[task])

problems, _dfsas = simple_ctx.workflow_linter.lint_job(j.job_id)
problems, *_ = simple_ctx.workflow_linter.lint_job(j.job_id)

assert len([problem for problem in problems if problem.message == "Could not locate import: greenlet"]) == 1


Expand All @@ -624,7 +651,7 @@ def test_job_dlt_task_linter_happy_path(
make_notebook,
make_directory,
make_pipeline,
):
) -> None:
entrypoint = make_directory()
make_notebook(path=f"{entrypoint}/notebook.py", content=b"import greenlet")
dlt_pipeline = make_pipeline(
Expand All @@ -638,11 +665,12 @@ def test_job_dlt_task_linter_happy_path(
)
j = make_job(tasks=[task])

problems, _dfsas = simple_ctx.workflow_linter.lint_job(j.job_id)
problems, *_ = simple_ctx.workflow_linter.lint_job(j.job_id)

assert len([problem for problem in problems if problem.message == "Could not locate import: greenlet"]) == 0


def test_job_dependency_problem_egg_dbr14plus(make_job, make_directory, simple_ctx, ws):
def test_job_dependency_problem_egg_dbr14plus(make_job, make_directory, simple_ctx, ws) -> None:
egg_file = Path(__file__).parent / "../../unit/source_code/samples/distribution/dist/thingy-0.0.1-py3.10.egg"
entrypoint = make_directory()
remote_egg_file = f"{entrypoint}/{egg_file.name}"
Expand All @@ -652,7 +680,7 @@ def test_job_dependency_problem_egg_dbr14plus(make_job, make_directory, simple_c

j = make_job(libraries=[library])

problems, _dfsas = simple_ctx.workflow_linter.lint_job(j.job_id)
problems, *_ = simple_ctx.workflow_linter.lint_job(j.job_id)
assert (
len(
[
Expand Down

0 comments on commit 9de04eb

Please sign in to comment.