diff --git a/.gitignore b/.gitignore index da5e94a6c326..c57011ab34c7 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,6 @@ cache-* # python tmp files __pycache__ + +scripts/offlinepi/mitmproxy-ca-cert.pem +scripts/offlinepi/responses.dat diff --git a/Cargo.lock b/Cargo.lock index 8f922b04a910..ea28816435cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2979,6 +2979,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "rustls", + "rustls-native-certs", "rustls-pemfile", "serde", "serde_json", @@ -2993,7 +2994,6 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots", "winreg", ] @@ -3111,6 +3111,18 @@ dependencies = [ "sct", ] +[[package]] +name = "rustls-native-certs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +dependencies = [ + "openssl-probe", + "rustls-pemfile", + "schannel", + "security-framework", +] + [[package]] name = "rustls-pemfile" version = "1.0.3" @@ -3151,6 +3163,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "schannel" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c3733bf4cf7ea0880754e19cb5a462007c4a8c1914bff372ccc95b464f1df88" +dependencies = [ + "windows-sys 0.48.0", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -3193,6 +3214,29 @@ version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" +[[package]] +name = "security-framework" +version = "2.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05b64fb303737d99b81884b2c63433e9ae28abebe5eb5045dcdd175dc2ecf4de" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e932934257d3b408ed8f30db49d85ea163bfe74961f017f405b025af298f0c7a" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "serde" version = "1.0.191" @@ -4165,12 +4209,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "webpki-roots" -version = "0.25.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14247bb57be4f377dfb94c72830b8ce8fc6beac03cf4bf7b9732eadd414123fc" - [[package]] name = "which" version = "5.0.0" diff --git a/Cargo.toml b/Cargo.toml index 68b062e1b1d3..d1f87b2ecf3e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,7 +60,7 @@ rayon = { version = "1.8.0" } # For correct IO error handling: https://github.com/cargo-bins/reflink-copy/pull/51 reflink-copy = { git = "https://github.com/cargo-bins/reflink-copy", rev = "7dffdccc4d4152cdc0a460b3ba8e77dd84ad74df" } regex = { version = "1.10.2" } -reqwest = { version = "0.11.22", default-features = false, features = ["json", "gzip", "brotli", "stream", "rustls-tls"] } +reqwest = { version = "0.11.22", default-features = false, features = ["json", "gzip", "brotli", "stream", "rustls-tls-native-roots"] } reqwest-middleware = { version = "0.2.4" } reqwest-retry = { version = "0.3.0" } rfc2047-decoder = { version = "1.0.1" } diff --git a/crates/puffin-client/Cargo.toml b/crates/puffin-client/Cargo.toml index c8ffe7212ea0..78089ebc8ea2 100644 --- a/crates/puffin-client/Cargo.toml +++ b/crates/puffin-client/Cargo.toml @@ -35,3 +35,6 @@ url = { workspace = true } [dev-dependencies] anyhow = { workspace = true } tokio = { workspace = true, features = ["fs", "macros"] } + +[features] +puffin-test-custom-ca-cert = [] diff --git a/crates/puffin-client/src/registry_client.rs b/crates/puffin-client/src/registry_client.rs index 348df4057738..e57240cfbb09 100644 --- a/crates/puffin-client/src/registry_client.rs +++ b/crates/puffin-client/src/registry_client.rs @@ -74,11 +74,22 @@ impl RegistryClientBuilder { pub fn build(self) -> RegistryClient { let client_raw = { - let client_core = ClientBuilder::new() + let mut client_core = ClientBuilder::new() .user_agent("puffin") .pool_max_idle_per_host(20) .timeout(std::time::Duration::from_secs(60 * 5)); + if cfg!(feature = "puffin-test-custom-ca-cert") { + if let Some(cert) = std::env::var_os("PUFFIN_TEST_CA_CERT_PEM") { + client_core = client_core.add_root_certificate( + reqwest::Certificate::from_pem( + &fs_err::read(cert).expect("No PUFFIN_TEST_CA_CERT_PEM"), + ) + .expect("Invalid certificate"), + ) + } + } + client_core.build().expect("Fail to build HTTP client.") }; diff --git a/scripts/offlinepi/README.md b/scripts/offlinepi/README.md new file mode 100644 index 000000000000..958b34f932ed --- /dev/null +++ b/scripts/offlinepi/README.md @@ -0,0 +1,58 @@ +# offlinepi + +Utilities for managing an offline version of PyPI. + +## Installation + +Installation requires `mitmproxy`. We require unreleased changes, it is recommended to install from GitHub: + +``` +pip install git+https://github.com/mitmproxy/mitmproxy@1fcd0335d59c301d73d1b1ef676ecafcf520ab79 +``` + +## Usage + +Record PyPI responses during a command: + +``` +./offlinepi record +``` + +Replay PyPI responses during a command: + +``` +./offlinepi replay +``` + +### Example + +Record server interactions during Puffin's tests: + +``` +./offlinepi record cargo test --features pypi -- --test-threads=1 +``` + +**Note**: Recording tests without parallelism is helpful for reliable replays. + +Then, run it again using replayed responses: + +``` +./offlinepi replay cargo test --features pypi +``` + +## TLS Certificates + +In order to record HTTPS requests, the certificate generated by mitmproxy must be installed. +See [the mitmproxy certificate documentation](https://docs.mitmproxy.org/stable/concepts-certificates/) for details. + +## Implementation + +[mitmproxy](https://mitmproxy.org/) is used to record and replay responses. + +The proxy is temporarily created for the execution of the provided command. + +The command _must_ respect the `HTTP_PROXY` and `HTTPS_PROXY` environment variables. + +Response recording is limited to `pypi.org` and `files.pythonhosted.org`. + +Responses are written to `responses.dat` in the `offlinepi` project root. diff --git a/scripts/offlinepi/offlinepi b/scripts/offlinepi/offlinepi new file mode 100755 index 000000000000..200af9943247 --- /dev/null +++ b/scripts/offlinepi/offlinepi @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# +# Run a command, recording or replaying interaction with the PyPI server. +# +# Usage: +# +# offlinepi +# + +projectroot=$(realpath "$(dirname "$0")") +responsefile=$projectroot/responses.har + +mode=$1 +shift + +if [ -z "$mode" ]; then + echo 'A mode must be provided e.g. `offlinepi record ...`' + exit 1 +fi + +if [[ "${mode}" != @(record|replay) ]]; then + echo "Invalid mode \"$mode\"; expected either \"record\" or \"replay\"." + exit 1 +fi + +if $projectroot/offlinepi-healthcheck; then + echo "Proxy is already running at localhost:8080" + echo "Aborted!" + exit 1 +fi + +echo "Starting proxy server to $mode responses..." +$projectroot/offlinepi-$mode $responsefile& +PROXY_PID=$! + +if ! $projectroot/offlinepi-wait $PROXY_PID; then + echo "Server failed to start!" + echo "Aborted!" + $projectroot/offlinepi-stop $PROXY_PID + exit 1 +fi + +export HTTP_PROXY=http://localhost:8080 +export HTTPS_PROXY=https://localhost:8080 + +echo "Running provided command..." +"$@" + +echo "Stopping proxy server..." +$projectroot/offlinepi-stop $PROXY_PID diff --git a/scripts/offlinepi/offlinepi-healthcheck b/scripts/offlinepi/offlinepi-healthcheck new file mode 100755 index 000000000000..ec21260ae877 --- /dev/null +++ b/scripts/offlinepi/offlinepi-healthcheck @@ -0,0 +1,12 @@ +#!/usr/bin/env sh +# +# Checks if the proxy is running. +# +# Usage: +# +# offlinepi-healthcheck + +exec curl --output /dev/null --silent --head --fail --proxy 127.0.0.1:8080 http://mitm.it + +# TODO(zanieb): We could consider looking at the response to determine if a _different_ proxy is being used. +# TODO(zanieb): This could take a configurable host and port diff --git a/scripts/offlinepi/offlinepi-record b/scripts/offlinepi/offlinepi-record new file mode 100755 index 000000000000..7e48332a3a74 --- /dev/null +++ b/scripts/offlinepi/offlinepi-record @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# +# Start a proxy that records client server interactions to a file. +# +# Usage: +# +# offlinepi-record + +path=$1 +shift + +if [ -z "$path" ]; then + echo 'A recording path must be provided.' + exit 1 +fi + +if [ -n "$*" ]; then + echo "Unexpected extra arguments: $*" + exit 1 +fi + +# N.B. Additional options must be added _before_ the filter string +exec mitmdump \ + --set stream_large_bodies=1000m \ + --set hardump="$path" \ + "~d pypi.org|files.pythonhosted.org|mitm.it" + +# stream_large_bodies: must be set to a large value or large responses will not be recorded +# resulting in an unexpected file endings during replays +# hardump: we use a HAR file instead of the binary format (-w ) so it the output is +# human readable +# ~d: only interactions with package index domains should be recorded +# we also allow `mitm.it` so healthchecks succeed when replaying + +# Helpful notes for development +# --flow-detail <0-4> can be used to adjust the amount information displayed about traffic diff --git a/scripts/offlinepi/offlinepi-replay b/scripts/offlinepi/offlinepi-replay new file mode 100755 index 000000000000..198e5e2cbc54 --- /dev/null +++ b/scripts/offlinepi/offlinepi-replay @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# +# Start a proxy that replays server responses from a recording. +# Unknown responses will result in a 500. +# Each response can only be replayed once or it will be treated as unknown. +# +# Usage: +# +# offlinepi-start-replay + +path=$1 +shift + +if [ -z "$path" ]; then + echo 'A recording path must be provided.' + exit 1 +fi + +if [ -n "$*" ]; then + echo "Unexpected extra arguments: $*" + exit 1 +fi + +exec mitmdump --server-replay "$path" \ + --flow-detail 3 \ + --server-replay-extra 500 \ + --set connection_strategy=lazy + +# server-replay-extra: configures behavior when a response is unknown. +# connection_stategy: lazy is required to replay offline diff --git a/scripts/offlinepi/offlinepi-stop b/scripts/offlinepi/offlinepi-stop new file mode 100755 index 000000000000..583903ab1eb2 --- /dev/null +++ b/scripts/offlinepi/offlinepi-stop @@ -0,0 +1,24 @@ +#!/usr/bin/env sh +# +# Stops the proxy at the given PID. +# +# Usage: +# +# offlinepi-stop + +pid=$1 +shift + +if [ -z "$pid" ]; then + echo 'A PID must be provided.' + exit 1 +fi + +if [ -n "$*" ]; then + echo "Unexpected extra arguments: $*" + exit 1 +fi + +kill "$pid" 2> /dev/null +wait "$pid" 2> /dev/null +echo "Done!" diff --git a/scripts/offlinepi/offlinepi-wait b/scripts/offlinepi/offlinepi-wait new file mode 100755 index 000000000000..5cfb87c91577 --- /dev/null +++ b/scripts/offlinepi/offlinepi-wait @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# +# Waits for the proxy to be ready. +# +# Usage: +# +# offlinepi-wait-ready + +projectroot=$(realpath "$(dirname "$0")") +healthcheck="$projectroot/offlinepi-healthcheck" + +pid=$1 +shift + +if [ -z "$pid" ]; then + echo 'A PID must be provided.' + exit 1 +fi + +if [ -n "$*" ]; then + echo "Unexpected extra arguments: $*" + exit 1 +fi + + +# Wait until the server is ready +until $healthcheck; do + if ! kill -0 "$pid" 2> /dev/null; then + exit 1 + fi + sleep 1 +done