Added infrastructure to run local Jupyter Lab instance and also added…

… pre-commit configuration
glue-viz · Nov 13, 2024 · 7aed9be · 7aed9be
1 parent 404fe60
commit 7aed9be
Show file tree

Hide file tree

Showing 8 changed files with 208 additions and 54 deletions.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -0,0 +1,15 @@
+# To get started with Dependabot version updates, you'll need to specify which
+# package ecosystems to update and where the package manifests are located.
+# Please see the documentation for all configuration options:
+# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
+
+version: 2
+updates:
+  - package-ecosystem: "github-actions" # See documentation for possible values
+    directory: ".github/workflows" # Location of package manifests
+    schedule:
+      interval: "weekly"
+    groups:
+      actions:
+        patterns:
+          - "*"
diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,4 @@ __pycache__
 dist
 build
 .ipynb_checkpoints
+__pycache__
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,29 @@
+ci:
+  autofix_prs: false
+  autoupdate_schedule: 'monthly'
+
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: check-added-large-files
+        args: ["--enforce-all", "--maxkb=300"]
+      - id: check-case-conflict
+      - id: check-json
+      - id: check-merge-conflict
+      - id: check-symlinks
+      - id: check-toml
+      - id: check-xml
+      - id: check-yaml
+        exclude: ".*(.github.*)$"
+      - id: detect-private-key
+      - id: end-of-file-fixer
+        exclude: ".*(data.*|extern.*|licenses.*|_static.*|_parsetab.py)$"
+      - id: trailing-whitespace
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: "v0.3.4"
+    hooks:
+      - id: ruff
+        args: ["--fix", "--show-fixes"]
+      - id: ruff-format
diff --git a/jupyter_output_monitor/__init__.py b/jupyter_output_monitor/__init__.py
@@ -1,2 +1,4 @@
 from ._monitor import monitor
 from ._version import __version__
+
+__all__ = ["monitor", "__version__"]
diff --git a/jupyter_output_monitor/_monitor.py b/jupyter_output_monitor/_monitor.py
@@ -4,81 +4,114 @@
 
 import os
 import sys
+import tempfile
 import time
-import click
-import datetime
+from io import BytesIO
 
-import numpy as np
+import click
 from PIL import Image
 from playwright.sync_api import sync_playwright
-from io import BytesIO
 
+from ._server import jupyter_server
+from ._utils import clear_notebook, isotime
 
 RG_SPECIAL = (143, 56)
 
-def isotime():
-    return datetime.datetime.now().isoformat()
 
 @click.command()
-@click.argument('url')
-@click.option('--output', default=None, help='Output directory - if not specified, this defaults to output_<timestamp>')
-@click.option('--wait-after-execute', default=10, help='Time in s to wait after executing each cell')
-@click.option('--headless', is_flag=True, help='Whether to run in headless mode')
-def monitor(url, output, wait_after_execute, headless):
-
+@click.option(
+    "--notebook",
+    default=None,
+    help="The notebook to profile. If specified a local Jupyter Lab instance will be run",
+)
+@click.option(
+    "--url",
+    default=None,
+    help="The URL hosting the notebook to profile, including any token and notebook path.",
+)
+@click.option(
+    "--output",
+    default=None,
+    help="Output directory - if not specified, this defaults to output_<timestamp>",
+)
+@click.option(
+    "--wait-after-execute",
+    default=10,
+    help="Time in s to wait after executing each cell",
+)
+@click.option("--headless", is_flag=True, help="Whether to run in headless mode")
+def monitor(notebook, url, output, wait_after_execute, headless):
     if output is None:
-        output = f'output-{isotime()}'
+        output = f"output-{isotime()}"
 
     if os.path.exists(output):
-        print('Output directory {output} already exists')
+        print("Output directory {output} already exists")
         sys.exit(1)
 
     os.makedirs(output)
 
+    if notebook is None and url is None:
+        print("Either --notebook or --url should be specified")
+        sys.exit(1)
+    elif notebook is not None and url is not None:
+        print("Only one of --notebook or --url should be specified")
+        sys.exit(1)
+    elif notebook is not None:
+        # Create a temporary directory with a clean version of the notebook
+        notebook_dir = tempfile.mkdtemp()
+        clear_notebook(notebook, os.path.join(notebook_dir, "notebook.ipynb"))
+        with jupyter_server(notebook_dir) as server:
+            url = server.base_url + "/lab/tree/notebook.ipynb"
+            _monitor_output(url, output, wait_after_execute, headless)
+    else:
+        _monitor_output(url, output, wait_after_execute, headless)
+
+
+def _monitor_output(url, output, wait_after_execute, headless):
     # Index of the current last screenshot, by output index
     last_screenshot = {}
 
-    with sync_playwright() as p, open(os.path.join(output, 'event_log.csv'), 'w') as log:
-
-        log.write('time,event,index,screenshot\n')
+    with (
+        sync_playwright() as p,
+        open(os.path.join(output, "event_log.csv"), "w") as log,
+    ):
+        log.write("time,event,index,screenshot\n")
         log.flush()
 
         # Launch browser and open URL
 
         browser = p.firefox.launch(headless=headless)
-        page = browser.new_page(viewport={'width':2000, 'height':10000})
+        page = browser.new_page(viewport={"width": 2000, "height": 10000})
         page.goto(url)
 
         while True:
-
-            print('Checking for input cells')
+            print("Checking for input cells")
 
             # Construct list of input and output cells in the notebook
-            input_cells = list(page.query_selector_all('.jp-InputArea-editor'))
+            input_cells = list(page.query_selector_all(".jp-InputArea-editor"))
 
             # Keep only input cells that are visible
             input_cells = [cell for cell in input_cells if cell.is_visible()]
 
             if len(input_cells) > 0:
                 break
 
-            print('-> No input cells found, waiting before checking again')
+            print("-> No input cells found, waiting before checking again")
 
             # If no visible input cells, wait and try again
             page.wait_for_timeout(1000)
 
-        print(f'{len(input_cells)} input cells found')
+        print(f"{len(input_cells)} input cells found")
 
         last_screenshot = {}
 
         # Now loop over each input cell and execute
         for input_index, input_cell in enumerate(input_cells):
-
-            if input_cell.text_content().strip() == '':
-                print(f'Skipping empty input cell {input_index}')
+            if input_cell.text_content().strip() == "":
+                print(f"Skipping empty input cell {input_index}")
                 continue
 
-            print(f'Execute input cell {input_index}')
+            print(f"Execute input cell {input_index}")
 
             # Take screenshot before we start executing cell but save it after
             screenshot_bytes = input_cell.screenshot()
@@ -87,48 +120,51 @@ def monitor(url, output, wait_after_execute, headless):
             input_cell.click()
 
             # Execute it
-            page.keyboard.press('Shift+Enter')
+            page.keyboard.press("Shift+Enter")
 
             timestamp = isotime()
 
-            screenshot_filename = os.path.join(output, f'input-{input_index:03d}-{timestamp}.png')
+            screenshot_filename = os.path.join(
+                output,
+                f"input-{input_index:03d}-{timestamp}.png",
+            )
             image = Image.open(BytesIO(screenshot_bytes))
             image.save(screenshot_filename)
 
-            log.write(f'{timestamp},execute-input,{input_index},{screenshot_filename}\n')
+            log.write(
+                f"{timestamp},execute-input,{input_index},{screenshot_filename}\n",
+            )
 
             # Now loop and check for changes in any of the output cells - if a cell
             # output changes, save a screenshot
 
-            print('Watching for changes in output cells')
+            print("Watching for changes in output cells")
 
             start = time.time()
             while time.time() - start < wait_after_execute:
-
-                output_cells = list(page.query_selector_all('.jp-OutputArea-output'))
+                output_cells = list(page.query_selector_all(".jp-OutputArea-output"))
 
                 for output_cell in output_cells:
-
                     if not output_cell.is_visible():
                         continue
 
                     # The element we are interested in is one level down
 
-                    div = output_cell.query_selector('div')
+                    div = output_cell.query_selector("div")
 
                     if div is None:
                         continue
 
-                    style = div.get_attribute('style')
+                    style = div.get_attribute("style")
 
-                    if style is None or 'border-color: rgb(' not in style:
+                    if style is None or "border-color: rgb(" not in style:
                         continue
 
                     # Parse rgb values for border
-                    start_pos = style.index('border-color:')
-                    start_pos = style.index('(', start_pos) + 1
-                    end_pos = style.index(')', start_pos)
-                    r, g, b = [int(x) for x in style[start_pos:end_pos].split(',')]
+                    start_pos = style.index("border-color:")
+                    start_pos = style.index("(", start_pos) + 1
+                    end_pos = style.index(")", start_pos)
+                    r, g, b = (int(x) for x in style[start_pos:end_pos].split(","))
 
                     # The (r,g) pair is chosen to be random and unlikely to
                     # happen by chance on the page. If this values don't match, we
@@ -142,30 +178,39 @@ def monitor(url, output, wait_after_execute, headless):
                     # which should be sufficient
                     output_index = b
 
-                    print(f'- taking screenshot of output cell {output_index}')
+                    print(f"- taking screenshot of output cell {output_index}")
 
                     screenshot_bytes = div.screenshot()
 
                     # If screenshot didn't exist before for this cell or if it has
                     # changed, we save it to a file and keep track of it.
-                    if output_index not in last_screenshot or last_screenshot[output_index] != screenshot_bytes:
-
-                        print(f' -> change detected!')
+                    if (
+                        output_index not in last_screenshot
+                        or last_screenshot[output_index] != screenshot_bytes
+                    ):
+                        print(" -> change detected!")
 
                         timestamp = isotime()
-                        screenshot_filename = os.path.join(output, f'output-{output_index:03d}-{timestamp}.png')
+                        screenshot_filename = os.path.join(
+                            output,
+                            f"output-{output_index:03d}-{timestamp}.png",
+                        )
                         image = Image.open(BytesIO(screenshot_bytes))
                         image.save(screenshot_filename)
 
-                        log.write(f'{timestamp},output-changed,{output_index},{screenshot_filename}\n')
+                        log.write(
+                            f"{timestamp},output-changed,{output_index},{screenshot_filename}\n",
+                        )
                         log.flush()
 
-                        print(f"Saving screenshot of output {output_index} at {timestamp}")
+                        print(
+                            f"Saving screenshot of output {output_index} at {timestamp}",
+                        )
 
                         last_screenshot[output_index] = screenshot_bytes
 
-            print('Stopping monitoring output and moving on to next input cell')
+            print("Stopping monitoring output and moving on to next input cell")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     monitor()
diff --git a/jupyter_output_monitor/_server.py b/jupyter_output_monitor/_server.py
@@ -0,0 +1,20 @@
+from contextlib import contextmanager
+
+from solara.test.pytest_plugin import (
+    ServerJupyter,
+)
+
+from ._utils import get_free_port
+
+__all__ = ["jupyter_server"]
+
+
+@contextmanager
+def jupyter_server(notebook_path):
+    server = ServerJupyter(notebook_path, get_free_port(), "localhost")
+    try:
+        server.serve_threaded()
+        server.wait_until_serving()
+        yield server
+    finally:
+        server.stop_serving()
diff --git a/jupyter_output_monitor/_utils.py b/jupyter_output_monitor/_utils.py
@@ -0,0 +1,33 @@
+import datetime
+import socket
+
+from nbconvert import NotebookExporter
+from traitlets.config import Config
+
+__all__ = ["get_free_port", "clear_notebook", "isotime"]
+
+
+def get_free_port():
+    """Return a free port number."""
+    sock = socket.socket()
+    sock.bind(("", 0))
+    return sock.getsockname()[1]
+
+
+def clear_notebook(input_notebook, output_notebook):
+    """Write out a copy of the notebook with output and metadata removed."""
+    c = Config()
+    c.NotebookExporter.preprocessors = [
+        "nbconvert.preprocessors.ClearOutputPreprocessor",
+        "nbconvert.preprocessors.ClearMetadataPreprocessor",
+    ]
+
+    exporter = NotebookExporter(config=c)
+    body, resources = exporter.from_filename(input_notebook)
+
+    with open(output_notebook, "w") as f:
+        f.write(body)
+
+
+def isotime():
+    return datetime.datetime.now().isoformat()
diff --git a/pyproject.toml b/pyproject.toml
@@ -38,8 +38,17 @@ find = {namespaces = false}
 write_to = "jupyter_output_monitor/_version.py"
 
 [tool.ruff]
-lint.select = [
-  "B",  # flake8-bugbear
-  "I",  # isort
-  "UP",  # pyupgrade
+lint.select = ["ALL"]
+lint.ignore = [
+    "A00",
+    "ANN",
+    "T201",
+    "PTH",
+    "D100",
+    "D103",
+    "D104",
+    "C901",
+    "PLR0915",
+    "DTZ",
+    "E501"
 ]
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,3 +4,4 @@ __pycache__ @@
     dist
     build
     .ipynb_checkpoints
+    __pycache__