Skip to content

Commit

Permalink
server: option in shot-scraper multi, refs #156
Browse files Browse the repository at this point in the history
  • Loading branch information
simonw committed Sep 26, 2024
1 parent a559ea0 commit bcab22e
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 24 deletions.
13 changes: 13 additions & 0 deletions docs/multi.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,19 @@ You can include desired `height`, `width`, `quality`, `wait` and `wait_for` opti
wait_for: document.querySelector('#bighead')
```

## Running a server for the duration of the session

If you need to run a server for the duration of the `shot-scraper multi` session you can specify that using a `server:` block, like this:
```yaml
- server: python -m http.server 8000
```
You can now take screenshots of `http://localhost:8000/` and any other URLs that are relative to that server:
```yaml
- output: index.png
url: http://localhost:8000/
```
The server process will be automatically terminated when the `shot-scraper multi` command completes.

## `shot-scraper multi --help`

Full `--help` for this command:
Expand Down
64 changes: 40 additions & 24 deletions shot_scraper/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from playwright.sync_api import sync_playwright, Error, TimeoutError
from runpy import run_module
import secrets
import subprocess
import sys
import textwrap
import time
Expand Down Expand Up @@ -513,6 +514,7 @@ def multi(
"""
scale_factor = normalize_scale_factor(retina, scale_factor)
shots = yaml.safe_load(config)
server_processes = []
if shots is None:
shots = []
if not isinstance(shots, list):
Expand All @@ -530,31 +532,43 @@ def multi(
auth_username=auth_username,
auth_password=auth_password,
)
for shot in shots:
if (
noclobber
and shot.get("output")
and pathlib.Path(shot["output"]).exists()
):
continue
if outputs and shot.get("output") not in outputs:
continue
try:
take_shot(
context,
shot,
log_console=log_console,
skip=skip,
fail=fail,
silent=silent,
)
except TimeoutError as e:
if fail or fail_on_error:
raise click.ClickException(str(e))
else:
click.echo(str(e), err=True)
try:
for shot in shots:
if (
noclobber
and shot.get("output")
and pathlib.Path(shot["output"]).exists()
):
continue
browser_obj.close()
if outputs and shot.get("output") not in outputs:
continue
if "server" in shot:
# Start that subprocess and remember the pid
server_processes.append(
subprocess.Popen(shot["server"], shell=True)
)
time.sleep(1)
if "url" in shot:
try:
take_shot(
context,
shot,
log_console=log_console,
skip=skip,
fail=fail,
silent=silent,
)
except TimeoutError as e:
if fail or fail_on_error:
raise click.ClickException(str(e))
else:
click.echo(str(e), err=True)
continue
finally:
browser_obj.close()
if server_processes:
for process in server_processes:
process.kill()


@cli.command()
Expand Down Expand Up @@ -1053,6 +1067,7 @@ def _check_and_absolutize(filepath):
# On Windows, instantiating a Path object on `http://` or `https://` will raise an exception
return False


def _get_viewport(width, height):
if width or height:
return {
Expand All @@ -1062,6 +1077,7 @@ def _get_viewport(width, height):
else:
return {}


def take_shot(
context_or_page,
shot,
Expand Down
4 changes: 4 additions & 0 deletions tests/run_examples.sh
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ shot-scraper examples/div-after-2-seconds.html \
echo '# empty file' > empty.yml
shot-scraper multi empty.yml
(cd examples && echo '
- server: python -m http.server 9043
- output: example.com.png
url: http://www.example.com/
# This one will produce github-com.png
Expand Down Expand Up @@ -169,6 +170,9 @@ shot-scraper multi empty.yml
width: 300
height: 200
wait: 2100
# Screenshot from the server
- url: https://localhost:9043/
output: from-server.png
' | shot-scraper multi - --fail)
# --bypass-csp
shot-scraper javascript github.com "async () => { await import('https://cdn.jsdelivr.net/npm/left-pad/+esm'); return 'content-security-policy ignored' }" -o examples/github-csp.json --bypass-csp
Expand Down
16 changes: 16 additions & 0 deletions tests/test_shot_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,22 @@ def test_version():
assert result.output.startswith("cli, version ")


SERVER_YAML = """
- server: python -m http.server 9023
- url: http://localhost:9023/
output: {}
""".strip()


def test_multi_server(tmpdir):
yaml_file = tmpdir / "server.yaml"
yaml_file.write(SERVER_YAML.format(tmpdir / "output.png"))
runner = CliRunner()
result = runner.invoke(cli, ["multi", str(yaml_file)])
assert result.exit_code == 0, result.output
assert (tmpdir / "output.png").exists()


@pytest.mark.parametrize("input", ("key: value", "This is a string", "3.55"))
def test_multi_error_on_non_list(input):
runner = CliRunner()
Expand Down

0 comments on commit bcab22e

Please sign in to comment.