From 99d1d6973306c34a63bb19cc2c409870a5694ce6 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 26 Sep 2024 16:28:13 -0700 Subject: [PATCH] sh: and python: YAML commands, refs #155 --- docs/multi.md | 29 +++++++++++++++++++++++++++++ shot_scraper/cli.py | 16 ++++++++++++++-- tests/test_shot_scraper.py | 37 ++++++++++++++++++++++++++++++------- 3 files changed, 73 insertions(+), 9 deletions(-) diff --git a/docs/multi.md b/docs/multi.md index 025acb4..6e365f7 100644 --- a/docs/multi.md +++ b/docs/multi.md @@ -122,6 +122,35 @@ You can now take screenshots of `http://localhost:8000/` and any other URLs that ``` The server process will be automatically terminated when the `shot-scraper multi` command completes. +## Running custom code between steps + +If you are taking screenshots of a single application, you may find it useful to run additional steps between shots that modify that application in some way. + +You can do that using the `sh:` or `python:` keys. These can specify shell commands or Python code to run before taking the screenshot: + +```yaml +- sh: echo "Hello from shell" > index.html + output: from-shell.png + url: http://localhost:8000/ +``` +You can also specify a list of shell arguments like this: +```yaml +- sh: + - curl + - -o + - index.html + - https://www.example.com/ + output: example.png + url: http://localhost:8000/ +``` +If you specify these steps without a `url:` key they will still execute as individual task executions, without also taking a screenshot: +```yaml +- sh: echo "hello world" > index.html +- python: | + content = open("index.html").read() + open("index.html", "w").write(content.upper()) +``` + ## `shot-scraper multi --help` Full `--help` for this command: diff --git a/shot_scraper/cli.py b/shot_scraper/cli.py index 2ba60a6..103851d 100644 --- a/shot_scraper/cli.py +++ b/shot_scraper/cli.py @@ -540,8 +540,20 @@ def multi( and pathlib.Path(shot["output"]).exists() ): continue - if outputs and shot.get("output") not in outputs: + if outputs and shot.get("output") and shot.get("output") not in outputs: continue + # Run "sh" key + if shot.get("sh"): + sh = shot["sh"] + if isinstance(sh, str): + subprocess.run(shot["sh"], shell=True) + elif isinstance(sh, list): + subprocess.run(sh) + else: + raise click.ClickException("- sh: must be a string or list") + # And "python" key + if shot.get("python"): + subprocess.run([sys.executable, "-c", shot["python"]]) if "server" in shot: # Start that subprocess and remember the pid server_processes.append( @@ -1098,7 +1110,7 @@ def take_shot( url = url_or_file_path(url, file_exists=_check_and_absolutize) - output = shot.get("output", "").strip() + output = (shot.get("output") or "").strip() if not output and not return_bytes: output = filename_for_url(url, ext="png", file_exists=os.path.exists) quality = shot.get("quality") diff --git a/tests/test_shot_scraper.py b/tests/test_shot_scraper.py index ea07e9a..9c67f61 100644 --- a/tests/test_shot_scraper.py +++ b/tests/test_shot_scraper.py @@ -1,4 +1,5 @@ from click.testing import CliRunner +import pathlib import pytest import textwrap from shot_scraper.cli import cli @@ -15,17 +16,39 @@ def test_version(): SERVER_YAML = """ - server: python -m http.server 9023 - url: http://localhost:9023/ - output: {} + output: output.png """.strip() +COMMANDS_YAML = """ +- sh: echo "hello world" > index.html +- sh: + - touch + - touched.html +- python: | + content = open("index.html").read() + open("index.html", "w").write(content.upper()) +""" + -def test_multi_server(tmpdir): - yaml_file = tmpdir / "server.yaml" - yaml_file.write(SERVER_YAML.format(tmpdir / "output.png")) +def test_multi_server(): runner = CliRunner() - result = runner.invoke(cli, ["multi", str(yaml_file)]) - assert result.exit_code == 0, result.output - assert (tmpdir / "output.png").exists() + with runner.isolated_filesystem(): + open("server.yaml", "w").write(SERVER_YAML) + result = runner.invoke(cli, ["multi", "server.yaml"]) + assert result.exit_code == 0, result.output + assert pathlib.Path("output.png").exists() + + +def test_multi_commands(): + runner = CliRunner() + with runner.isolated_filesystem(): + yaml_file = "commands.yaml" + open(yaml_file, "w").write(COMMANDS_YAML) + result = runner.invoke(cli, ["multi", yaml_file], catch_exceptions=False) + assert result.exit_code == 0, result.output + assert pathlib.Path("touched.html").exists() + assert pathlib.Path("index.html").exists() + assert open("index.html").read().strip() == "HELLO WORLD" @pytest.mark.parametrize("input", ("key: value", "This is a string", "3.55"))