diff --git a/CHANGELOG.md b/CHANGELOG.md index e1919b3b..57be76b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +0.3.2 (2021-11-??) +------------------ + +* Feature: .ipynb files are now natively supported and can be used as Notebook Templates (#57) + + 0.3.1 (2021-10-29) ------------------ @@ -6,6 +12,7 @@ * Bugfix: Large notebooks were causing serialisation errors; now safely stored in gridfs. * **Incompatibility**: Reports run with this version onwards will not be readable by older versions of Notebooker. + 0.3.0 (2021-10-05) ------------------ diff --git a/notebooker/notebook_templates_example/sample/plot_random_raw.ipynb b/notebooker/notebook_templates_example/sample/plot_random_raw.ipynb new file mode 100644 index 00000000..49465b2b --- /dev/null +++ b/notebooker/notebook_templates_example/sample/plot_random_raw.ipynb @@ -0,0 +1,74 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "#Notebooker Test!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "lines_to_next_cell": 0, + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "plots = 5\n", + "days = 100\n", + "start_date = \"2020-01-01\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# -\n", + "arr = np.random.rand(days, plots) - 0.5\n", + "dts = np.array(start_date, dtype=np.datetime64) + np.arange(days)\n", + "df = pd.DataFrame(arr, index=dts)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# -\n", + "df.cumsum().plot()" + ] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_json": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooker/utils/conversion.py b/notebooker/utils/conversion.py index 20f424c6..d4f14156 100644 --- a/notebooker/utils/conversion.py +++ b/notebooker/utils/conversion.py @@ -66,9 +66,14 @@ def _git_pull_latest(repo: git.repo.Repo): repo.git.pull("origin", "master") -def _python_template(report_path: AnyStr, py_template_dir: AnyStr) -> AnyStr: - file_name = "{}.py".format(report_path) - return os.path.join(py_template_dir, file_name) +def _template(report_path: str, py_template_dir: AnyStr) -> AnyStr: + py_path = os.path.join(py_template_dir, "{}.py".format(report_path)) + ipynb_path = os.path.join(py_template_dir, "{}.ipynb".format(report_path)) + + if os.path.isfile(py_path): + return py_path + + return ipynb_path def _ipynb_output_path(template_base_dir: AnyStr, report_path: AnyStr, git_hex: AnyStr) -> AnyStr: @@ -76,15 +81,15 @@ def _ipynb_output_path(template_base_dir: AnyStr, report_path: AnyStr, git_hex: return os.path.join(template_base_dir, git_hex, file_name) -def _get_python_template_path(report_path: str, warn_on_local: bool, py_template_dir) -> str: +def _get_template_path(report_path: str, warn_on_local: bool, py_template_dir: AnyStr) -> str: if py_template_dir: - return _python_template(report_path, py_template_dir) + return _template(report_path, py_template_dir) else: if warn_on_local: logger.warning( "Loading from notebooker default templates. This is only expected if you are running locally." ) - return pkg_resources.resource_filename(__name__, "../notebook_templates_example/{}.py".format(report_path)) + return _template(report_path, pkg_resources.resource_filename(__name__, "../notebook_templates_example")) def _get_output_path_hex(notebooker_disable_git, py_template_dir) -> str: @@ -126,22 +131,25 @@ def generate_ipynb_from_py( Pulls the latest version of the notebook templates from git, and regenerates templates if there is a new HEAD OR: finds the local template from the template repository using a relative path - In both cases, this method converts the .py file into an .ipynb file which can be executed by papermill. + Both .ipynb and .py report templates are handled, where .py templates are converted to .ipynb, which can + be executed by papermill :param template_base_dir: The directory in which converted notebook templates reside. :param report_name: The name of the report which we are running. :param notebooker_disable_git: Whether or not to pull the latest version from git, if a change is available. - :param py_template_dir: The directory which contains raw python templates. This should be a subdir in a git repo. + :param py_template_dir: The directory which contains raw py/ipynb templates. This should be a subdir in a git repo. :param warn_on_local: Whether to warn when we are searching for notebooks in the notebooker repo itself. :return: The filepath of the .ipynb which we have just converted. """ report_path = convert_report_name_into_path(report_name) - python_template_path = _get_python_template_path(report_path, warn_on_local, py_template_dir) + template_path = _get_template_path(report_path, warn_on_local, py_template_dir) output_template_path = _ipynb_output_path( template_base_dir, report_path, _get_output_path_hex(notebooker_disable_git, py_template_dir) ) + mkdir_p(os.path.dirname(output_template_path)) + try: with open(output_template_path, "r") as f: if f.read(): @@ -151,14 +159,14 @@ def generate_ipynb_from_py( pass # "touch" the output file - print("Creating ipynb at: %s", output_template_path) - mkdir_p(os.path.dirname(output_template_path)) - with open(output_template_path, "w") as f: + print("Writing ipynb to: %s", output_template_path) + with open(output_template_path, "w"): os.utime(output_template_path, None) - jupytext_nb = jupytext.read(python_template_path) + jupytext_nb = jupytext.read(template_path) jupytext_nb["metadata"]["kernelspec"] = kernel_spec() # Override the kernel spec since we want to run it.. jupytext.write(jupytext_nb, output_template_path) + return output_template_path diff --git a/notebooker/utils/templates.py b/notebooker/utils/templates.py index 3121755f..922182c9 100644 --- a/notebooker/utils/templates.py +++ b/notebooker/utils/templates.py @@ -18,7 +18,7 @@ def _valid_dirname(d): def _valid_filename(f): - return f.endswith(".py") and "__init__" not in f and "__pycache__" not in f + return (f.endswith(".py") or f.endswith(".ipynb")) and "__init__" not in f and "__pycache__" not in f def _get_parameters_cell_idx(notebook: nbformat.NotebookNode) -> Optional[int]: diff --git a/notebooker/web/utils.py b/notebooker/web/utils.py index 30da544b..1699f377 100644 --- a/notebooker/web/utils.py +++ b/notebooker/web/utils.py @@ -54,7 +54,7 @@ def get_directory_structure(starting_point: Optional[str] = None) -> Dict[str, U if not _valid_dirname(path): continue folders = path[start:].split(os.sep) - subdir = {os.sep.join(folders[1:] + [f.replace(".py", "")]): None for f in files if _valid_filename(f)} + subdir = {os.sep.join(folders[1:] + [f.replace(".ipynb", "").replace(".py", "")]): None for f in files if _valid_filename(f)} parent = reduce(dict.get, folders[:-1], all_dirs) parent[folders[-1]] = subdir return all_dirs[rootdir[start:]] diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 5bc5c4f7..88affd03 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,7 +1,7 @@ import git import pytest -DUMMY_REPORT = """ +DUMMY_REPORT_PY = """ # --- # jupyter: # celltoolbar: Tags @@ -48,13 +48,86 @@ 1/0 """ +DUMMY_REPORT_IPYNB = """ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline", + "import pandas as pd", + "import numpy as np", + "import random" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "n_points = random.choice(range(50, 1000))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "idx = pd.date_range('1/1/2000', periods=n_points)", + "df = pd.DataFrame(np.random.randn(n_points, 4), index=idx, columns=list('ABCD'))", + "df.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "cumulative = df.cumsum()", + "cumulative.plot()" + ] + } + ], + "metadata": { + "celltoolbar": "Tags", + "jupytext": { + "cell_metadata_json": true, + "notebook_metadata_filter": "celltoolbar,jupytext_format_version" + }, + "kernelspec": { + "display_name": "spark273", + "language": "python", + "name": "spark273" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} +""" @pytest.fixture def setup_workspace(workspace): (workspace.workspace + "/templates").mkdir() git.Git(workspace.workspace).init() (workspace.workspace + "/templates/fake").mkdir() - report_to_run = workspace.workspace + "/templates/fake/report.py" - report_to_run.write_lines(DUMMY_REPORT.split("\n")) + + py_report_to_run = workspace.workspace + "/templates/fake/py_report.py" + py_report_to_run.write_lines(DUMMY_REPORT_PY.split("\n")) + + ipynb_report_to_run = workspace.workspace + "/templates/fake/ipynb_report.ipynb" + ipynb_report_to_run.write_lines(DUMMY_REPORT_IPYNB.split("\n")) + report_to_run_failing = workspace.workspace + "/templates/fake/report_failing.py" report_to_run_failing.write_lines(DUMMY_FAILING_REPORT.split("\n")) diff --git a/tests/integration/test_e2e.py b/tests/integration/test_e2e.py index 7bd380ad..ff977ace 100644 --- a/tests/integration/test_e2e.py +++ b/tests/integration/test_e2e.py @@ -2,6 +2,7 @@ import datetime import freezegun +import pytest from notebooker.constants import JobStatus from notebooker.web.routes.run_report import _rerun_report, run_report @@ -29,12 +30,18 @@ def _check_report_output(job_id, serialiser, **kwargs): assert getattr(result, k) == v, "Report output for attribute {} was incorrect!".format(k) +@pytest.mark.parametrize( + "report_name", + [ + "fake/py_report", + "fake/ipynb_report" + ], +) @freezegun.freeze_time(datetime.datetime(2018, 1, 12)) -def test_run_report(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace): +def test_run_report(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace, report_name): with flask_app.app_context(): serialiser = get_serializer() overrides = {"n_points": 5} - report_name = "fake/report" report_title = "my report title" mailto = "" job_id = run_report( @@ -59,7 +66,6 @@ def test_run_report(bson_library, flask_app, setup_and_cleanup_notebooker_filesy assert job_id == serialiser.get_latest_successful_job_id_for_name_and_params(report_name, overrides) assert job_id == serialiser.get_latest_successful_job_id_for_name_and_params(report_name, None) - @freezegun.freeze_time(datetime.datetime(2018, 1, 12)) def test_run_failing_report(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace): with flask_app.app_context(): @@ -83,12 +89,18 @@ def test_run_failing_report(bson_library, flask_app, setup_and_cleanup_notebooke assert result.stdout +@pytest.mark.parametrize( + "report_name", + [ + "fake/py_report", + "fake/ipynb_report" + ], +) @freezegun.freeze_time(datetime.datetime(2018, 1, 12)) -def test_run_report_and_rerun(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace): +def test_run_report_and_rerun(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace, report_name): with flask_app.app_context(): serialiser = get_serializer() overrides = {"n_points": 5} - report_name = "fake/report" report_title = "my report title" mailto = "" job_id = run_report( @@ -126,12 +138,18 @@ def test_run_report_and_rerun(bson_library, flask_app, setup_and_cleanup_noteboo assert job_id != serialiser.get_latest_successful_job_id_for_name_and_params(report_name, overrides) +@pytest.mark.parametrize( + "report_name", + [ + "fake/py_report", + "fake/ipynb_report" + ], +) @freezegun.freeze_time(datetime.datetime(2018, 1, 12)) -def test_run_report_hide_code(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace): +def test_run_report_hide_code(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace, report_name): with flask_app.app_context(): serialiser = get_serializer() overrides = {"n_points": 5} - report_name = "fake/report" report_title = "my report title" mailto = "" job_id = run_report( diff --git a/tests/integration/test_templates.py b/tests/integration/test_templates.py index 4d474492..09524972 100644 --- a/tests/integration/test_templates.py +++ b/tests/integration/test_templates.py @@ -4,4 +4,6 @@ def test_get_all_possible_templates(flask_app): flask_app.config["PY_TEMPLATE_BASE_DIR"] = None with flask_app.app_context(): - assert get_all_possible_templates() == {"sample": {"sample/plot_random": None, "sample/test_plotly": None}} + assert get_all_possible_templates() == { + "sample": {"sample/plot_random": None, "sample/test_plotly": None, "sample/plot_random_raw": None} + } diff --git a/tests/integration/web/test_core_routes.py b/tests/integration/web/test_core_routes.py index 36f70f61..da6bc475 100644 --- a/tests/integration/web/test_core_routes.py +++ b/tests/integration/web/test_core_routes.py @@ -9,7 +9,7 @@ def test_create_schedule(flask_app, setup_workspace): ) assert rv.status_code == 200 data = json.loads(rv.data) - assert data == {"result": ["fake/report", "fake/report_failing"]} + assert data == {"result": ["fake/py_report", "fake/ipynb_report", "fake/report_failing"]} def test_version_number(flask_app, setup_workspace): diff --git a/tests/integration/web/test_scheduling.py b/tests/integration/web/test_scheduling.py index 7c039e05..5a595bdb 100644 --- a/tests/integration/web/test_scheduling.py +++ b/tests/integration/web/test_scheduling.py @@ -1,15 +1,23 @@ import json import mock import time +import pytest -def test_create_schedule(flask_app, setup_workspace): +@pytest.mark.parametrize( + "report_name", + [ + "fake/py_report", + "fake/ipynb_report" + ], +) +def test_create_schedule(flask_app, setup_workspace, report_name): with flask_app.test_client() as client: rv = client.post( - "/scheduler/create/fake/report", + f"/scheduler/create/{report_name}", data={ "report_title": "test2", - "report_name": "fake/report", + "report_name": report_name, "overrides": "", "mailto": "", "generate_pdf": True, @@ -21,16 +29,16 @@ def test_create_schedule(flask_app, setup_workspace): assert data.pop("next_run_time") assert data == { "cron_schedule": "* * * * *", - "delete_url": "/scheduler/fake/report_test2", - "id": "fake/report_test2", + "delete_url": f"/scheduler/{report_name}_test2", + "id": f"{report_name}_test2", "params": { "generate_pdf": True, "hide_code": False, "mailto": "", "overrides": "", - "report_name": "fake/report", + "report_name": report_name, "report_title": "test2", - "scheduler_job_id": "fake/report_test2", + "scheduler_job_id": f"{report_name}_test2", }, "trigger": { "fields": { @@ -47,13 +55,20 @@ def test_create_schedule(flask_app, setup_workspace): } -def test_scheduler_handles_booleans_properly(flask_app, setup_workspace): +@pytest.mark.parametrize( + "report_name", + [ + "fake/py_report", + "fake/ipynb_report" + ], +) +def test_scheduler_handles_booleans_properly(flask_app, setup_workspace, report_name): with flask_app.test_client() as client: rv = client.post( - "/scheduler/create/fake/report", + f"/scheduler/create/{report_name}", data={ "report_title": "test2", - "report_name": "fake/report", + "report_name": report_name, "overrides": "", "mailto": "", "generate_pdf": True, @@ -83,13 +98,20 @@ def test_create_schedule_bad_report_name(flask_app, setup_workspace): assert rv.status_code == 404 -def test_list_scheduled_jobs(flask_app, setup_workspace): +@pytest.mark.parametrize( + "report_name", + [ + "fake/py_report", + "fake/ipynb_report" + ], +) +def test_list_scheduled_jobs(flask_app, setup_workspace, report_name): with flask_app.test_client() as client: rv = client.post( - "/scheduler/create/fake/report", + f"/scheduler/create/{report_name}", data={ "report_title": "test2", - "report_name": "fake/report", + "report_name": report_name, "overrides": "", "mailto": "", "cron_schedule": "* * * * *", @@ -101,16 +123,23 @@ def test_list_scheduled_jobs(flask_app, setup_workspace): assert rv.status_code == 200 jobs = json.loads(rv.data) assert len(jobs) == 1 - assert jobs[0]["id"] == "fake/report_test2" + assert jobs[0]["id"] == f"{report_name}_test2" -def test_delete_scheduled_jobs(flask_app, setup_workspace): +@pytest.mark.parametrize( + "report_name", + [ + "fake/py_report", + "fake/ipynb_report" + ], +) +def test_delete_scheduled_jobs(flask_app, setup_workspace, report_name): with flask_app.test_client() as client: rv = client.post( - "/scheduler/create/fake/report", + f"/scheduler/create/{report_name}", data={ "report_title": "test2", - "report_name": "fake/report", + "report_name": report_name, "overrides": "", "mailto": "", "cron_schedule": "* * * * *", @@ -122,7 +151,7 @@ def test_delete_scheduled_jobs(flask_app, setup_workspace): assert rv.status_code == 200 assert len(json.loads(rv.data)) == 1 - rv = client.delete("/scheduler/fake/report_test2") + rv = client.delete(f"/scheduler/{report_name}_test2") assert rv.status_code == 200 rv = client.get("/scheduler/jobs") @@ -130,7 +159,14 @@ def test_delete_scheduled_jobs(flask_app, setup_workspace): assert len(json.loads(rv.data)) == 0 -def test_scheduler_runs_notebooks(flask_app, setup_workspace): +@pytest.mark.parametrize( + "report_name", + [ + "fake/py_report", + "fake/ipynb_report" + ], +) +def test_scheduler_runs_notebooks(flask_app, setup_workspace, report_name): with flask_app.test_client() as client: def fake_post(url, params): @@ -143,10 +179,10 @@ def fake_post(url, params): assert len(json.loads(rv.data)) == 0 rv = client.post( - "/scheduler/create/fake/report", + f"/scheduler/create/{report_name}", data={ "report_title": "test2", - "report_name": "fake/report", + "report_name": report_name, "overrides": "", "mailto": "", "cron_schedule": "* * * * *", diff --git a/tests/unit/utils/test_conversion.py b/tests/unit/utils/test_conversion.py index 7dc60137..8061c398 100644 --- a/tests/unit/utils/test_conversion.py +++ b/tests/unit/utils/test_conversion.py @@ -4,6 +4,7 @@ import tempfile import uuid +import pytest import mock from click.testing import CliRunner @@ -13,14 +14,41 @@ from notebooker.utils.conversion import _output_ipynb_name -def test_generate_ipynb_from_py(setup_and_cleanup_notebooker_filesystem, webapp_config, flask_app): +@pytest.mark.parametrize( + "file_type", + [ + "py", + "ipynb", + ], +) +def test_generate_ipynb_from_py(file_type, setup_and_cleanup_notebooker_filesystem, webapp_config, flask_app): python_dir = webapp_config.PY_TEMPLATE_BASE_DIR with flask_app.app_context(): set_cache("latest_sha", "fake_sha_early") os.mkdir(python_dir + "/extra_path") - with open(os.path.join(python_dir, "extra_path", "test_report.py"), "w") as f: - f.write("#hello world\n") + with open(os.path.join(python_dir, "extra_path", f"test_report.{file_type}"), "w") as f: + if file_type == "py": + f.write("#hello world\n") + elif file_type == "ipynb": + f.write( + json.dumps( + { + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": ["import datetime"], + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 2, + } + ) + ) report_path = os.sep.join(["extra_path", "test_report"]) with mock.patch("notebooker.utils.conversion.git.repo.Repo") as repo: repo().commit().hexsha = "fake_sha_early" @@ -38,10 +66,10 @@ def test_generate_ipynb_from_py(setup_and_cleanup_notebooker_filesystem, webapp_ uuid_1 = uuid.UUID(int=12345) uuid_2 = uuid.UUID(int=67890) with mock.patch("notebooker.utils.conversion.uuid.uuid4") as uuid4: - with mock.patch("notebooker.utils.conversion.pkg_resources.resource_filename") as resource_filename: + with mock.patch("notebooker.utils.conversion._template") as template: conversion.python_template_dir = lambda *a, **kw: None uuid4.return_value = uuid_1 - resource_filename.return_value = python_dir + "/extra_path/test_report.py" + template.return_value = python_dir + f"/extra_path/test_report.{file_type}" conversion.generate_ipynb_from_py(python_dir, "extra_path/test_report", False, py_template_dir="") expected_ipynb_path = os.path.join(python_dir, str(uuid_1), expected_filename) diff --git a/tests/unit/utils/test_templates.py b/tests/unit/utils/test_templates.py index e98ded7d..87cbdb02 100644 --- a/tests/unit/utils/test_templates.py +++ b/tests/unit/utils/test_templates.py @@ -17,6 +17,8 @@ def test_get_directory_structure(): "depth/2.py", "this/is/deep.py", "this/report.py", + "hello_again.ipynb", + "depth/3.ipynb", ] for path in paths: abspath = os.path.join(temp_dir, path) @@ -27,8 +29,9 @@ def test_get_directory_structure(): expected_structure = { "hello": None, "goodbye": None, - "depth": {"depth/1": None, "depth/2": None}, + "depth": {"depth/1": None, "depth/2": None, "depth/3": None}, "this": {"this/report": None, "is": {"this/is/deep": None, "very": {"this/is/very/deep": None}}}, + "hello_again": None, } assert get_directory_structure(temp_dir) == expected_structure