Skip to content

Commit

Permalink
Merge pull request #1 from bmorris3/monitor-nb-output
Browse files Browse the repository at this point in the history
Optional notebook output with interleaved runtime and screenshots
  • Loading branch information
astrofrog authored Nov 13, 2024
2 parents 937257c + a40f822 commit 7d4b985
Show file tree
Hide file tree
Showing 8 changed files with 177 additions and 17 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@ dist
build
.ipynb_checkpoints
__pycache__
output-2*
jupyter_output_monitor/_version.py
jupyter_output_monitor/__pycache__/*
15 changes: 13 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ The R and G values should be kept as (143, 56), and the B color should be unique

Then, to run the notebook and monitor the changes in widget output, run:

jupyter-output-monitor --notebook mynotebook.ipynb
jupyter-output-monitor monitor --notebook mynotebook.ipynb

Where ``mynotebook.ipynb`` is the name of your notebook. By default, this will
open a window showing you what is happening, but you can also pass ``--headless``
Expand All @@ -36,7 +36,7 @@ to run in headless mode.
If you want to test this on an existing Jupyter Lab instance, including
remote ones, you can use ``--url`` instead of ``--notebook``:

jupyter-output-monitor http://localhost:8987/lab/tree/notebook.ipynb?token=7bb9a...
jupyter-output-monitor monitor --url http://localhost:8987/lab/tree/notebook.ipynb?token=7bb9a...

Note that the URL should include the path to the notebook, and will likely
require the token too.
Expand Down Expand Up @@ -123,3 +123,14 @@ after the previous one. This is 10s by default but can be customized with
``--wait-after-execute=20`` for example. You should set this value so that the
cell that takes the longest to fully execute will be expected to take less than
this time.

## Generating a report

You can generate a copy of the input notebook with output screenshots and profiling
results inserted by using e.g.:

jupyter-output-monitor report --notebook mynotebook.ipynb --results-dir=output

Where ``--results-dir`` is the output directory generated with the ``monitor``
command. BY default, this will write a ``report.ipynb`` notebook, but you can
overwrite the filename with ``--output-report-name``.
4 changes: 3 additions & 1 deletion jupyter_output_monitor/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from .__main__ import main
from ._monitor import monitor
from ._report import report
from ._version import __version__

__all__ = ["monitor", "__version__"]
__all__ = ["monitor", "report", "__version__", "main"]
9 changes: 7 additions & 2 deletions jupyter_output_monitor/__main__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
from ._monitor import monitor
import click

from ._monitor import monitor_group
from ._report import report_group

main = click.CommandCollection(sources=[monitor_group, report_group])

if __name__ == "__main__":
monitor()
main()
25 changes: 15 additions & 10 deletions jupyter_output_monitor/_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,21 @@
from ._server import jupyter_server
from ._utils import clear_notebook, isotime

__all__ = ["monitor", "monitor_group"]

RG_SPECIAL = (143, 56)


@click.command()
def iso_to_path(time):
return time.replace(":", "-")


@click.group()
def monitor_group():
pass


@monitor_group.command()
@click.option(
"--notebook",
default=None,
Expand All @@ -42,7 +53,7 @@
@click.option("--headless", is_flag=True, help="Whether to run in headless mode")
def monitor(notebook, url, output, wait_after_execute, headless):
if output is None:
output = f"output-{isotime()}"
output = f"output-{iso_to_path(isotime())}"

if os.path.exists(output):
print(f"Output directory {output} already exists")
Expand Down Expand Up @@ -124,12 +135,9 @@ def _monitor_output(url, output, wait_after_execute, headless):

timestamp = isotime()

# Colons are invalid in filenames on Windows
filename_timestamp = timestamp.replace(":", "-")

screenshot_filename = os.path.join(
output,
f"input-{input_index:03d}-{filename_timestamp}.png",
f"input-{input_index:03d}-{iso_to_path(timestamp)}.png",
)
image = Image.open(BytesIO(screenshot_bytes))
image.save(screenshot_filename)
Expand Down Expand Up @@ -192,12 +200,9 @@ def _monitor_output(url, output, wait_after_execute, headless):

timestamp = isotime()

# Colons are invalid in filenames on Windows
filename_timestamp = timestamp.replace(":", "-")

screenshot_filename = os.path.join(
output,
f"output-{output_index:03d}-{filename_timestamp}.png",
f"output-{output_index:03d}-{iso_to_path(timestamp)}.png",
)
image = Image.open(BytesIO(screenshot_bytes))
image.save(screenshot_filename)
Expand Down
116 changes: 116 additions & 0 deletions jupyter_output_monitor/_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import csv
import datetime
import os

import click
import nbformat

__all__ = ["report", "report_group"]


@click.group()
def report_group():
pass


@report_group.command()
@click.option(
"--notebook",
default=None,
help="The notebook that was profiled.",
)
@click.option(
"--results-dir",
default=None,
help="Output results directory from the profiling",
)
@click.option(
"--output-report-name",
default="report.ipynb",
help="Write a copy of the notebook containing screenshots and profiling results to a notebook with the specified name, in the results directory",
)
def report(notebook, results_dir, output_report_name):
with open(os.path.join(results_dir, "event_log.csv")) as csvfile:
reader = csv.DictReader(csvfile)
log = list(reader)

# convert ISO times to elapsed times from first executed cell:
start_time = datetime.datetime.fromisoformat(log[0]["time"])
for row in log:
row["time"] = (
datetime.datetime.fromisoformat(row["time"]) - start_time
).total_seconds()

results = {}
last_executed_cell = None

# group timing results by execution cell
for row in log:
index = row["index"]
event = row["event"]

if index not in results and event == "execute-input":
results[index] = {
"execute-input": None,
"output-changed": [],
}

results[index][event] = row
last_executed_cell = index

elif event == "output-changed":
row["output_from_cell"] = last_executed_cell
row["dt"] = (
row["time"] - results[last_executed_cell]["execute-input"]["time"]
)
results[last_executed_cell][event].append(row)

# compute "final" timing results per execution cell
for result in results.values():
has_outputs = len(result["output-changed"])
result["total"] = result["output-changed"][-1]["dt"] if has_outputs else None
result["n_updates"] = len(result["output-changed"]) if has_outputs else None

# assemble annotations in markdown format for each executed code cell:
markdown_annotations = []
for idx, result in results.items():
if len(result["output-changed"]):
screenshot_path = os.path.basename(
result["output-changed"][-1]["screenshot"],
)
markdown_annotations.append(
f"![output screenshot]({screenshot_path})\n\n"
f"#### Profiling result for cell {idx}: \n * {result['total']:.2f} seconds "
f"elapsed\n * {result['n_updates']:d} output updates\n",
)
else:
markdown_annotations.append(
f"#### Profiling result for cell {idx}: \nNo output.\n",
)

# read in the source notebook:
nb = nbformat.read(notebook, nbformat.NO_CONVERT)

# create new list of cells, weaving together the existing
# cells and the new markdown cells with profiling results
# and screenshots:
new_cells = []
nonempty_code_cell_idx = -1
for cell in nb["cells"]:
new_cells.append(cell)
if cell["cell_type"] == "code" and len(cell["source"]):
nonempty_code_cell_idx += 1
new_cells.append(
nbformat.v4.new_markdown_cell(
markdown_annotations[nonempty_code_cell_idx],
),
)

nb["cells"] = new_cells

output_notebook = os.path.join(results_dir, output_report_name)

print(f"Writing notebook with profiling results to: {output_notebook}")

new_notebook = nbformat.from_dict(nb)
nbformat.write(new_notebook, output_notebook)
17 changes: 17 additions & 0 deletions jupyter_output_monitor/tests/test_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def test_simple(tmp_path):
sys.executable,
"-m",
"jupyter_output_monitor",
"monitor",
"--notebook",
str(DATA / "simple.ipynb"),
"--output",
Expand Down Expand Up @@ -40,3 +41,19 @@ def test_simple(tmp_path):
with open(output_path / "event_log.csv") as f:
reader = csv.reader(f, delimiter=",")
assert len(list(reader)) == 10

subprocess.run(
[
sys.executable,
"-m",
"jupyter_output_monitor",
"report",
"--notebook",
str(DATA / "simple.ipynb"),
"--results-dir",
str(output_path),
],
check=True,
)

assert (output_path / "report.ipynb").exists()
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ dependencies = [
"click",
"pillow",
"playwright",
"solara[pytest]"
"solara[pytest]",
"nbformat",
]
dynamic = ["version"]

Expand All @@ -26,7 +27,7 @@ requires = ["setuptools",
build-backend = 'setuptools.build_meta'

[project.scripts]
jupyter-output-monitor = "jupyter_output_monitor:monitor"
jupyter-output-monitor = "jupyter_output_monitor:main"

[tool.setuptools]
zip-safe = false
Expand Down

0 comments on commit 7d4b985

Please sign in to comment.