Merge pull request #1 from bmorris3/monitor-nb-output

Optional notebook output with interleaved runtime and screenshots
glue-viz · Nov 13, 2024 · 7d4b985 · 7d4b985
2 parents 937257c + a40f822
commit 7d4b985
Show file tree

Hide file tree

Showing 8 changed files with 177 additions and 17 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,6 @@ dist
 build
 .ipynb_checkpoints
 __pycache__
+output-2*
+jupyter_output_monitor/_version.py
+jupyter_output_monitor/__pycache__/*
diff --git a/README.md b/README.md
@@ -25,7 +25,7 @@ The R and G values should be kept as (143, 56), and the B color should be unique
 
 Then, to run the notebook and monitor the changes in widget output, run:
 
-    jupyter-output-monitor --notebook mynotebook.ipynb
+    jupyter-output-monitor monitor --notebook mynotebook.ipynb
 
 Where ``mynotebook.ipynb`` is the name of your notebook. By default, this will
 open a window showing you what is happening, but you can also pass ``--headless``
@@ -36,7 +36,7 @@ to run in headless mode.
 If you want to test this on an existing Jupyter Lab instance, including
 remote ones, you can use ``--url`` instead of ``--notebook``:
 
-    jupyter-output-monitor http://localhost:8987/lab/tree/notebook.ipynb?token=7bb9a...
+    jupyter-output-monitor monitor --url http://localhost:8987/lab/tree/notebook.ipynb?token=7bb9a...
 
 Note that the URL should include the path to the notebook, and will likely
 require the token too.
@@ -123,3 +123,14 @@ after the previous one. This is 10s by default but can be customized with
 ``--wait-after-execute=20`` for example. You should set this value so that the
 cell that takes the longest to fully execute will be expected to take less than
 this time.
+
+## Generating a report
+
+You can generate a copy of the input notebook with output screenshots and profiling
+results inserted by using e.g.:
+
+    jupyter-output-monitor report --notebook mynotebook.ipynb --results-dir=output
+
+Where ``--results-dir`` is the output directory generated with the ``monitor``
+command. BY default, this will write a ``report.ipynb`` notebook, but you can
+overwrite the filename with ``--output-report-name``.
diff --git a/jupyter_output_monitor/__init__.py b/jupyter_output_monitor/__init__.py
@@ -1,4 +1,6 @@
+from .__main__ import main
 from ._monitor import monitor
+from ._report import report
 from ._version import __version__
 
-__all__ = ["monitor", "__version__"]
+__all__ = ["monitor", "report", "__version__", "main"]
diff --git a/jupyter_output_monitor/__main__.py b/jupyter_output_monitor/__main__.py
@@ -1,4 +1,9 @@
-from ._monitor import monitor
+import click
+
+from ._monitor import monitor_group
+from ._report import report_group
+
+main = click.CommandCollection(sources=[monitor_group, report_group])
 
 if __name__ == "__main__":
-    monitor()
+    main()
diff --git a/jupyter_output_monitor/_monitor.py b/jupyter_output_monitor/_monitor.py
@@ -15,10 +15,21 @@
 from ._server import jupyter_server
 from ._utils import clear_notebook, isotime
 
+__all__ = ["monitor", "monitor_group"]
+
 RG_SPECIAL = (143, 56)
 
 
-@click.command()
+def iso_to_path(time):
+    return time.replace(":", "-")
+
+
+@click.group()
+def monitor_group():
+    pass
+
+
+@monitor_group.command()
 @click.option(
     "--notebook",
     default=None,
@@ -42,7 +53,7 @@
 @click.option("--headless", is_flag=True, help="Whether to run in headless mode")
 def monitor(notebook, url, output, wait_after_execute, headless):
     if output is None:
-        output = f"output-{isotime()}"
+        output = f"output-{iso_to_path(isotime())}"
 
     if os.path.exists(output):
         print(f"Output directory {output} already exists")
@@ -124,12 +135,9 @@ def _monitor_output(url, output, wait_after_execute, headless):
 
             timestamp = isotime()
 
-            # Colons are invalid in filenames on Windows
-            filename_timestamp = timestamp.replace(":", "-")
-
             screenshot_filename = os.path.join(
                 output,
-                f"input-{input_index:03d}-{filename_timestamp}.png",
+                f"input-{input_index:03d}-{iso_to_path(timestamp)}.png",
             )
             image = Image.open(BytesIO(screenshot_bytes))
             image.save(screenshot_filename)
@@ -192,12 +200,9 @@ def _monitor_output(url, output, wait_after_execute, headless):
 
                         timestamp = isotime()
 
-                        # Colons are invalid in filenames on Windows
-                        filename_timestamp = timestamp.replace(":", "-")
-
                         screenshot_filename = os.path.join(
                             output,
-                            f"output-{output_index:03d}-{filename_timestamp}.png",
+                            f"output-{output_index:03d}-{iso_to_path(timestamp)}.png",
                         )
                         image = Image.open(BytesIO(screenshot_bytes))
                         image.save(screenshot_filename)

diff --git a/jupyter_output_monitor/_report.py b/jupyter_output_monitor/_report.py
@@ -0,0 +1,116 @@
+import csv
+import datetime
+import os
+
+import click
+import nbformat
+
+__all__ = ["report", "report_group"]
+
+
+@click.group()
+def report_group():
+    pass
+
+
+@report_group.command()
+@click.option(
+    "--notebook",
+    default=None,
+    help="The notebook that was profiled.",
+)
+@click.option(
+    "--results-dir",
+    default=None,
+    help="Output results directory from the profiling",
+)
+@click.option(
+    "--output-report-name",
+    default="report.ipynb",
+    help="Write a copy of the notebook containing screenshots and profiling results to a notebook with the specified name, in the results directory",
+)
+def report(notebook, results_dir, output_report_name):
+    with open(os.path.join(results_dir, "event_log.csv")) as csvfile:
+        reader = csv.DictReader(csvfile)
+        log = list(reader)
+
+    # convert ISO times to elapsed times from first executed cell:
+    start_time = datetime.datetime.fromisoformat(log[0]["time"])
+    for row in log:
+        row["time"] = (
+            datetime.datetime.fromisoformat(row["time"]) - start_time
+        ).total_seconds()
+
+    results = {}
+    last_executed_cell = None
+
+    # group timing results by execution cell
+    for row in log:
+        index = row["index"]
+        event = row["event"]
+
+        if index not in results and event == "execute-input":
+            results[index] = {
+                "execute-input": None,
+                "output-changed": [],
+            }
+
+            results[index][event] = row
+            last_executed_cell = index
+
+        elif event == "output-changed":
+            row["output_from_cell"] = last_executed_cell
+            row["dt"] = (
+                row["time"] - results[last_executed_cell]["execute-input"]["time"]
+            )
+            results[last_executed_cell][event].append(row)
+
+    # compute "final" timing results per execution cell
+    for result in results.values():
+        has_outputs = len(result["output-changed"])
+        result["total"] = result["output-changed"][-1]["dt"] if has_outputs else None
+        result["n_updates"] = len(result["output-changed"]) if has_outputs else None
+
+    # assemble annotations in markdown format for each executed code cell:
+    markdown_annotations = []
+    for idx, result in results.items():
+        if len(result["output-changed"]):
+            screenshot_path = os.path.basename(
+                result["output-changed"][-1]["screenshot"],
+            )
+            markdown_annotations.append(
+                f"![output screenshot]({screenshot_path})\n\n"
+                f"#### Profiling result for cell {idx}: \n * {result['total']:.2f} seconds "
+                f"elapsed\n * {result['n_updates']:d} output updates\n",
+            )
+        else:
+            markdown_annotations.append(
+                f"#### Profiling result for cell {idx}: \nNo output.\n",
+            )
+
+    # read in the source notebook:
+    nb = nbformat.read(notebook, nbformat.NO_CONVERT)
+
+    # create new list of cells, weaving together the existing
+    # cells and the new markdown cells with profiling results
+    # and screenshots:
+    new_cells = []
+    nonempty_code_cell_idx = -1
+    for cell in nb["cells"]:
+        new_cells.append(cell)
+        if cell["cell_type"] == "code" and len(cell["source"]):
+            nonempty_code_cell_idx += 1
+            new_cells.append(
+                nbformat.v4.new_markdown_cell(
+                    markdown_annotations[nonempty_code_cell_idx],
+                ),
+            )
+
+    nb["cells"] = new_cells
+
+    output_notebook = os.path.join(results_dir, output_report_name)
+
+    print(f"Writing notebook with profiling results to: {output_notebook}")
+
+    new_notebook = nbformat.from_dict(nb)
+    nbformat.write(new_notebook, output_notebook)
diff --git a/jupyter_output_monitor/tests/test_monitor.py b/jupyter_output_monitor/tests/test_monitor.py
@@ -13,6 +13,7 @@ def test_simple(tmp_path):
             sys.executable,
             "-m",
             "jupyter_output_monitor",
+            "monitor",
             "--notebook",
             str(DATA / "simple.ipynb"),
             "--output",
@@ -40,3 +41,19 @@ def test_simple(tmp_path):
     with open(output_path / "event_log.csv") as f:
         reader = csv.reader(f, delimiter=",")
         assert len(list(reader)) == 10
+
+    subprocess.run(
+        [
+            sys.executable,
+            "-m",
+            "jupyter_output_monitor",
+            "report",
+            "--notebook",
+            str(DATA / "simple.ipynb"),
+            "--results-dir",
+            str(output_path),
+        ],
+        check=True,
+    )
+
+    assert (output_path / "report.ipynb").exists()
diff --git a/pyproject.toml b/pyproject.toml
@@ -12,7 +12,8 @@ dependencies = [
     "click",
     "pillow",
     "playwright",
-    "solara[pytest]"
+    "solara[pytest]",
+    "nbformat",
 ]
 dynamic = ["version"]
 
@@ -26,7 +27,7 @@ requires = ["setuptools",
 build-backend = 'setuptools.build_meta'
 
 [project.scripts]
-jupyter-output-monitor = "jupyter_output_monitor:monitor"
+jupyter-output-monitor = "jupyter_output_monitor:main"
 
 [tool.setuptools]
 zip-safe = false