Skip to content

Commit

Permalink
Support ipynb files without requiring conversion (#57)
Browse files Browse the repository at this point in the history
Support ipynb files without requiring conversion

Co-authored-by: Danyaal Masood <Danyaal.Masood@man.com>
Co-authored-by: Jon Bannister <jon.bannister@man.com>
  • Loading branch information
3 people authored Nov 9, 2021
1 parent 21f897c commit e3a514b
Show file tree
Hide file tree
Showing 12 changed files with 303 additions and 54 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
0.3.2 (2021-11-??)
------------------

* Feature: .ipynb files are now natively supported and can be used as Notebook Templates (#57)


0.3.1 (2021-10-29)
------------------

Expand All @@ -6,6 +12,7 @@
* Bugfix: Large notebooks were causing serialisation errors; now safely stored in gridfs.
* **Incompatibility**: Reports run with this version onwards will not be readable by older versions of Notebooker.


0.3.0 (2021-10-05)
------------------

Expand Down
74 changes: 74 additions & 0 deletions notebooker/notebook_templates_example/sample/plot_random_raw.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"lines_to_next_cell": 2
},
"source": [
"#Notebooker Test!"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"lines_to_next_cell": 0,
"tags": [
"parameters"
]
},
"outputs": [],
"source": [
"plots = 5\n",
"days = 100\n",
"start_date = \"2020-01-01\"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# -\n",
"arr = np.random.rand(days, plots) - 0.5\n",
"dts = np.array(start_date, dtype=np.datetime64) + np.arange(days)\n",
"df = pd.DataFrame(arr, index=dts)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# -\n",
"df.cumsum().plot()"
]
}
],
"metadata": {
"jupytext": {
"cell_metadata_json": true
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
34 changes: 21 additions & 13 deletions notebooker/utils/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,25 +66,30 @@ def _git_pull_latest(repo: git.repo.Repo):
repo.git.pull("origin", "master")


def _python_template(report_path: AnyStr, py_template_dir: AnyStr) -> AnyStr:
file_name = "{}.py".format(report_path)
return os.path.join(py_template_dir, file_name)
def _template(report_path: str, py_template_dir: AnyStr) -> AnyStr:
py_path = os.path.join(py_template_dir, "{}.py".format(report_path))
ipynb_path = os.path.join(py_template_dir, "{}.ipynb".format(report_path))

if os.path.isfile(py_path):
return py_path

return ipynb_path


def _ipynb_output_path(template_base_dir: AnyStr, report_path: AnyStr, git_hex: AnyStr) -> AnyStr:
file_name = _output_ipynb_name(report_path)
return os.path.join(template_base_dir, git_hex, file_name)


def _get_python_template_path(report_path: str, warn_on_local: bool, py_template_dir) -> str:
def _get_template_path(report_path: str, warn_on_local: bool, py_template_dir: AnyStr) -> str:
if py_template_dir:
return _python_template(report_path, py_template_dir)
return _template(report_path, py_template_dir)
else:
if warn_on_local:
logger.warning(
"Loading from notebooker default templates. This is only expected if you are running locally."
)
return pkg_resources.resource_filename(__name__, "../notebook_templates_example/{}.py".format(report_path))
return _template(report_path, pkg_resources.resource_filename(__name__, "../notebook_templates_example"))


def _get_output_path_hex(notebooker_disable_git, py_template_dir) -> str:
Expand Down Expand Up @@ -126,22 +131,25 @@ def generate_ipynb_from_py(
Pulls the latest version of the notebook templates from git, and regenerates templates if there is a new HEAD
OR: finds the local template from the template repository using a relative path
In both cases, this method converts the .py file into an .ipynb file which can be executed by papermill.
Both .ipynb and .py report templates are handled, where .py templates are converted to .ipynb, which can
be executed by papermill
:param template_base_dir: The directory in which converted notebook templates reside.
:param report_name: The name of the report which we are running.
:param notebooker_disable_git: Whether or not to pull the latest version from git, if a change is available.
:param py_template_dir: The directory which contains raw python templates. This should be a subdir in a git repo.
:param py_template_dir: The directory which contains raw py/ipynb templates. This should be a subdir in a git repo.
:param warn_on_local: Whether to warn when we are searching for notebooks in the notebooker repo itself.
:return: The filepath of the .ipynb which we have just converted.
"""
report_path = convert_report_name_into_path(report_name)
python_template_path = _get_python_template_path(report_path, warn_on_local, py_template_dir)
template_path = _get_template_path(report_path, warn_on_local, py_template_dir)
output_template_path = _ipynb_output_path(
template_base_dir, report_path, _get_output_path_hex(notebooker_disable_git, py_template_dir)
)

mkdir_p(os.path.dirname(output_template_path))

try:
with open(output_template_path, "r") as f:
if f.read():
Expand All @@ -151,14 +159,14 @@ def generate_ipynb_from_py(
pass

# "touch" the output file
print("Creating ipynb at: %s", output_template_path)
mkdir_p(os.path.dirname(output_template_path))
with open(output_template_path, "w") as f:
print("Writing ipynb to: %s", output_template_path)
with open(output_template_path, "w"):
os.utime(output_template_path, None)

jupytext_nb = jupytext.read(python_template_path)
jupytext_nb = jupytext.read(template_path)
jupytext_nb["metadata"]["kernelspec"] = kernel_spec() # Override the kernel spec since we want to run it..
jupytext.write(jupytext_nb, output_template_path)

return output_template_path


Expand Down
2 changes: 1 addition & 1 deletion notebooker/utils/templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def _valid_dirname(d):


def _valid_filename(f):
return f.endswith(".py") and "__init__" not in f and "__pycache__" not in f
return (f.endswith(".py") or f.endswith(".ipynb")) and "__init__" not in f and "__pycache__" not in f


def _get_parameters_cell_idx(notebook: nbformat.NotebookNode) -> Optional[int]:
Expand Down
2 changes: 1 addition & 1 deletion notebooker/web/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def get_directory_structure(starting_point: Optional[str] = None) -> Dict[str, U
if not _valid_dirname(path):
continue
folders = path[start:].split(os.sep)
subdir = {os.sep.join(folders[1:] + [f.replace(".py", "")]): None for f in files if _valid_filename(f)}
subdir = {os.sep.join(folders[1:] + [f.replace(".ipynb", "").replace(".py", "")]): None for f in files if _valid_filename(f)}
parent = reduce(dict.get, folders[:-1], all_dirs)
parent[folders[-1]] = subdir
return all_dirs[rootdir[start:]]
Expand Down
79 changes: 76 additions & 3 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import git
import pytest

DUMMY_REPORT = """
DUMMY_REPORT_PY = """
# ---
# jupyter:
# celltoolbar: Tags
Expand Down Expand Up @@ -48,13 +48,86 @@
1/0
"""

DUMMY_REPORT_IPYNB = """
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline",
"import pandas as pd",
"import numpy as np",
"import random"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"parameters"
]
},
"outputs": [],
"source": [
"n_points = random.choice(range(50, 1000))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"idx = pd.date_range('1/1/2000', periods=n_points)",
"df = pd.DataFrame(np.random.randn(n_points, 4), index=idx, columns=list('ABCD'))",
"df.plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"lines_to_next_cell": 2
},
"outputs": [],
"source": [
"cumulative = df.cumsum()",
"cumulative.plot()"
]
}
],
"metadata": {
"celltoolbar": "Tags",
"jupytext": {
"cell_metadata_json": true,
"notebook_metadata_filter": "celltoolbar,jupytext_format_version"
},
"kernelspec": {
"display_name": "spark273",
"language": "python",
"name": "spark273"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
"""

@pytest.fixture
def setup_workspace(workspace):
(workspace.workspace + "/templates").mkdir()
git.Git(workspace.workspace).init()
(workspace.workspace + "/templates/fake").mkdir()
report_to_run = workspace.workspace + "/templates/fake/report.py"
report_to_run.write_lines(DUMMY_REPORT.split("\n"))

py_report_to_run = workspace.workspace + "/templates/fake/py_report.py"
py_report_to_run.write_lines(DUMMY_REPORT_PY.split("\n"))

ipynb_report_to_run = workspace.workspace + "/templates/fake/ipynb_report.ipynb"
ipynb_report_to_run.write_lines(DUMMY_REPORT_IPYNB.split("\n"))

report_to_run_failing = workspace.workspace + "/templates/fake/report_failing.py"
report_to_run_failing.write_lines(DUMMY_FAILING_REPORT.split("\n"))
32 changes: 25 additions & 7 deletions tests/integration/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import datetime

import freezegun
import pytest

from notebooker.constants import JobStatus
from notebooker.web.routes.run_report import _rerun_report, run_report
Expand Down Expand Up @@ -29,12 +30,18 @@ def _check_report_output(job_id, serialiser, **kwargs):
assert getattr(result, k) == v, "Report output for attribute {} was incorrect!".format(k)


@pytest.mark.parametrize(
"report_name",
[
"fake/py_report",
"fake/ipynb_report"
],
)
@freezegun.freeze_time(datetime.datetime(2018, 1, 12))
def test_run_report(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace):
def test_run_report(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace, report_name):
with flask_app.app_context():
serialiser = get_serializer()
overrides = {"n_points": 5}
report_name = "fake/report"
report_title = "my report title"
mailto = ""
job_id = run_report(
Expand All @@ -59,7 +66,6 @@ def test_run_report(bson_library, flask_app, setup_and_cleanup_notebooker_filesy
assert job_id == serialiser.get_latest_successful_job_id_for_name_and_params(report_name, overrides)
assert job_id == serialiser.get_latest_successful_job_id_for_name_and_params(report_name, None)


@freezegun.freeze_time(datetime.datetime(2018, 1, 12))
def test_run_failing_report(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace):
with flask_app.app_context():
Expand All @@ -83,12 +89,18 @@ def test_run_failing_report(bson_library, flask_app, setup_and_cleanup_notebooke
assert result.stdout


@pytest.mark.parametrize(
"report_name",
[
"fake/py_report",
"fake/ipynb_report"
],
)
@freezegun.freeze_time(datetime.datetime(2018, 1, 12))
def test_run_report_and_rerun(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace):
def test_run_report_and_rerun(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace, report_name):
with flask_app.app_context():
serialiser = get_serializer()
overrides = {"n_points": 5}
report_name = "fake/report"
report_title = "my report title"
mailto = ""
job_id = run_report(
Expand Down Expand Up @@ -126,12 +138,18 @@ def test_run_report_and_rerun(bson_library, flask_app, setup_and_cleanup_noteboo
assert job_id != serialiser.get_latest_successful_job_id_for_name_and_params(report_name, overrides)


@pytest.mark.parametrize(
"report_name",
[
"fake/py_report",
"fake/ipynb_report"
],
)
@freezegun.freeze_time(datetime.datetime(2018, 1, 12))
def test_run_report_hide_code(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace):
def test_run_report_hide_code(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace, report_name):
with flask_app.app_context():
serialiser = get_serializer()
overrides = {"n_points": 5}
report_name = "fake/report"
report_title = "my report title"
mailto = ""
job_id = run_report(
Expand Down
4 changes: 3 additions & 1 deletion tests/integration/test_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@
def test_get_all_possible_templates(flask_app):
flask_app.config["PY_TEMPLATE_BASE_DIR"] = None
with flask_app.app_context():
assert get_all_possible_templates() == {"sample": {"sample/plot_random": None, "sample/test_plotly": None}}
assert get_all_possible_templates() == {
"sample": {"sample/plot_random": None, "sample/test_plotly": None, "sample/plot_random_raw": None}
}
2 changes: 1 addition & 1 deletion tests/integration/web/test_core_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def test_create_schedule(flask_app, setup_workspace):
)
assert rv.status_code == 200
data = json.loads(rv.data)
assert data == {"result": ["fake/report", "fake/report_failing"]}
assert data == {"result": ["fake/py_report", "fake/ipynb_report", "fake/report_failing"]}


def test_version_number(flask_app, setup_workspace):
Expand Down
Loading

0 comments on commit e3a514b

Please sign in to comment.