Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Load Pyodide runtime through HTTP #2451

Merged
merged 9 commits into from
Aug 9, 2024
3 changes: 2 additions & 1 deletion src/workerd/api/pyodide/pyodide.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ jsg::Bundle::Reader getPyodideBundle(kj::StringPtr version);


struct PythonConfig {
kj::Maybe<kj::Own<const kj::Directory>> diskCacheRoot;
kj::Maybe<kj::Own<const kj::Directory>> packageDiskCacheRoot;
kj::Maybe<kj::Own<const kj::Directory>> pyodideDiskCacheRoot;
bool createSnapshot;
bool createBaselineSnapshot;
};
Expand Down
1 change: 1 addition & 0 deletions src/workerd/server/server.c++
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <workerd/util/use-perfetto-categories.h>
#include <workerd/api/worker-rpc.h>
#include "workerd-api.h"
#include "workerd/api/pyodide/pyodide.h"
#include "workerd/io/hibernation-manager.h"
#include <stdlib.h>

Expand Down
10 changes: 7 additions & 3 deletions src/workerd/server/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,11 @@ class Server final: private kj::TaskSet::ErrorHandler {
void enableControl(uint fd) {
controlOverride = kj::heap<kj::FdOutputStream>(fd);
}
void setPythonDiskCacheRoot(kj::Maybe<kj::Own<const kj::Directory>> &&dkr) {
pythonConfig.diskCacheRoot = kj::mv(dkr);
void setPackageDiskCacheRoot(kj::Maybe<kj::Own<const kj::Directory>> &&dkr) {
pythonConfig.packageDiskCacheRoot = kj::mv(dkr);
}
void setPyodideDiskCacheRoot(kj::Maybe<kj::Own<const kj::Directory>> &&dkr) {
pythonConfig.pyodideDiskCacheRoot = kj::mv(dkr);
}
void setPythonCreateSnapshot() {
pythonConfig.createSnapshot = true;
Expand Down Expand Up @@ -103,7 +106,8 @@ class Server final: private kj::TaskSet::ErrorHandler {
kj::EntropySource& entropySource;
kj::Function<void(kj::String)> reportConfigError;
PythonConfig pythonConfig = PythonConfig {
.diskCacheRoot = kj::none,
.packageDiskCacheRoot = kj::none,
.pyodideDiskCacheRoot = kj::none,
.createSnapshot = false,
.createBaselineSnapshot = false
};
Expand Down
28 changes: 18 additions & 10 deletions src/workerd/server/tests/python/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,59 +1,67 @@
load("//:build/wd_test.bzl", "wd_test")

load("@bazel_skylib//rules:copy_file.bzl", "copy_file")

copy_file(
name = "pyodide-dev.capnp.bin@rule",
src = "//src/pyodide:pyodide.capnp.bin",
garrettgu10 marked this conversation as resolved.
Show resolved Hide resolved
out = "pyodide-bundle-cache/pyodide-dev.capnp.bin"
)

wd_test(
src = "hello/hello.wd-test",
args = ["--experimental"],
args = ["--experimental", "--pyodide-bundle-disk-cache-dir", "$(location pyodide-dev.capnp.bin@rule)/.."],
garrettgu10 marked this conversation as resolved.
Show resolved Hide resolved
data = glob(
[
"hello/*",
],
exclude = ["**/*.wd-test"],
),
) + ["pyodide-dev.capnp.bin@rule"],
)

wd_test(
src = "env-param/env.wd-test",
args = ["--experimental"],
args = ["--experimental", "--pyodide-bundle-disk-cache-dir", "$(location pyodide-dev.capnp.bin@rule)/.."],
data = glob(
[
"env-param/*",
],
exclude = ["**/*.wd-test"],
),
) + ["pyodide-dev.capnp.bin@rule"],
)

wd_test(
src = "random/random.wd-test",
args = ["--experimental"],
args = ["--experimental", "--pyodide-bundle-disk-cache-dir", "$(location pyodide-dev.capnp.bin@rule)/.."],
data = glob(
[
"random/*",
],
exclude = ["**/*.wd-test"],
),
) + ["pyodide-dev.capnp.bin@rule"],
)

# Disabled because it tests the same thing as the import test defined in import_tests.bzl
# wd_test(
# src = "langchain/langchain.wd-test",
# args = ["--experimental", "--disk-cache-dir", "../all_pyodide_wheels"],
# args = ["--experimental", "--pyodide-package-disk-cache-dir", "../all_pyodide_wheels", "--pyodide-bundle-disk-cache-dir", "$(location pyodide-dev.capnp.bin@rule)/.."],
# data = glob(
# [
# "langchain/*",
# ],
# exclude = ["**/*.wd-test"],
# ) + ["@all_pyodide_wheels//:whls"],
# ) + ["@all_pyodide_wheels//:whls", "pyodide-dev.capnp.bin@rule"],
# )
garrettgu10 marked this conversation as resolved.
Show resolved Hide resolved

wd_test(
src = "subdirectory/subdirectory.wd-test",
args = ["--experimental"],
args = ["--experimental", "--pyodide-bundle-disk-cache-dir", "$(location pyodide-dev.capnp.bin@rule)/.."],
data = glob(
[
"subdirectory/**",
],
exclude = ["**/*.wd-test"],
),
) + ["pyodide-dev.capnp.bin@rule"],
)

load("//src/workerd/server/tests/python:import_tests.bzl", "gen_import_tests")
Expand Down
5 changes: 3 additions & 2 deletions src/workerd/server/tests/python/import_tests.bzl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
load("@bazel_skylib//rules:write_file.bzl", "write_file")

load("//:build/wd_test.bzl", "wd_test")

def generate_import_py_file(imports):
Expand Down Expand Up @@ -51,7 +52,7 @@ def gen_import_tests(to_test):

wd_test(
src = wd_test_fname,
args = ["--experimental", "--disk-cache-dir", "../all_pyodide_wheels"],
data = [worker_py_fname, "@all_pyodide_wheels//:whls"],
args = ["--experimental", "--pyodide-package-disk-cache-dir", "../all_pyodide_wheels", "--pyodide-bundle-disk-cache-dir", "$(location pyodide-dev.capnp.bin@rule)/.."],
data = [worker_py_fname, "@all_pyodide_wheels//:whls", "pyodide-dev.capnp.bin@rule"],
tags = ["slow"],
)
107 changes: 102 additions & 5 deletions src/workerd/server/workerd-api.c++
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@
#include <openssl/sha.h>
#include <openssl/hmac.h>
#include <openssl/rand.h>
#include <kj/compat/http.h>
#include <kj/compat/tls.h>
#include <kj/compat/url.h>
#ifdef WORKERD_EXPERIMENTAL_ENABLE_WEBGPU
#include <workerd/api/gpu/gpu.h>
#else
Expand Down Expand Up @@ -116,7 +119,8 @@ JSG_DECLARE_ISOLATE_TYPE(JsgWorkerdIsolate,


static PythonConfig defaultConfig {
.diskCacheRoot = kj::none,
.packageDiskCacheRoot = kj::none,
.pyodideDiskCacheRoot = kj::none,
.createSnapshot = false,
.createBaselineSnapshot = false,
};
Expand Down Expand Up @@ -432,6 +436,89 @@ kj::Maybe<jsg::ModuleRegistry::ModuleInfo> WorkerdApi::tryCompileModule(
KJ_UNREACHABLE;
}

kj::Path getPyodideBundleFileName(kj::StringPtr version) {
return kj::Path(kj::str("pyodide-", version, ".capnp.bin"));
}

kj::Maybe<kj::Own<const kj::ReadableFile>> getPyodideBundleFile(const kj::Maybe<kj::Own<const kj::Directory>> &maybeDir, kj::StringPtr version) {
garrettgu10 marked this conversation as resolved.
Show resolved Hide resolved
KJ_IF_SOME(dir, maybeDir) {
kj::Path filename = getPyodideBundleFileName(version);
auto file = dir->tryOpenFile(filename);

return file;
}

return kj::none;
}

void writePyodideBundleFileToDisk(const kj::Maybe<kj::Own<const kj::Directory>> &maybeDir, kj::StringPtr version, kj::ArrayPtr<byte> bytes) {
garrettgu10 marked this conversation as resolved.
Show resolved Hide resolved
KJ_IF_SOME(dir, maybeDir) {
kj::Path filename = getPyodideBundleFileName(version);
auto replacer = dir->replaceFile(filename, kj::WriteMode::CREATE | kj::WriteMode::MODIFY);

replacer->get().writeAll(bytes);
replacer->commit();
}
}

bool fetchPyodideBundle(const api::pyodide::PythonConfig& pyConfig, kj::StringPtr version) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be tidier to have fetchPyodideBundle return a kj::Maybe<> directly, but we can tidy up that sort of thing in followup PRs.

if(api::pyodide::hasPyodideBundle(version)) {
garrettgu10 marked this conversation as resolved.
Show resolved Hide resolved
KJ_LOG(WARNING, "Pyodide version ", version, " already exists in pyodide bundle table");
return true;
}

auto maybePyodideBundleFile = getPyodideBundleFile(pyConfig.pyodideDiskCacheRoot, version);
KJ_IF_SOME(pyodideBundleFile, maybePyodideBundleFile) {
auto body = pyodideBundleFile->readAllBytes();
api::pyodide::setPyodideBundleData(kj::str(version), kj::mv(body));

return true;
}

if (version == "dev") {
// the "dev" version is special and indicates we're using the tip-of-tree version built for testing
// so we shouldn't fetch it from the internet, only check for its existence in the disk cache
return false;
}

{
KJ_LOG(INFO, "Loading Pyodide package from internet...");
kj::Thread([&] () {
kj::AsyncIoContext io = kj::setupAsyncIo();
kj::HttpHeaderTable table;

kj::TlsContext::Options options;
options.useSystemTrustStore = true;

kj::Own<kj::TlsContext> tls = kj::heap<kj::TlsContext>(kj::mv(options));
auto &network = io.provider->getNetwork();
auto tlsNetwork = tls->wrapNetwork(network);
auto &timer = io.provider->getTimer();

auto client = kj::newHttpClient(timer, table, network, *tlsNetwork);

kj::HttpHeaders headers(table);

kj::String url = kj::str("https://pyodide.runtime-playground.workers.dev/python-runtime-capnp-bin/pyodide-", version, ".capnp.bin");

auto req = client->request(kj::HttpMethod::GET, kj::StringPtr(url), headers);

auto res = req.response.wait(io.waitScope);
auto body = res.body->readAllBytes().wait(io.waitScope);

writePyodideBundleFileToDisk(pyConfig.pyodideDiskCacheRoot, version, body);

api::pyodide::setPyodideBundleData(kj::str(version), kj::mv(body));

});
}

KJ_LOG(INFO, "Loaded Pyodide package from internet");

return true;

}

void WorkerdApi::compileModules(
jsg::Lock& lockParam, config::Worker::Reader conf,
Worker::ValidationErrorReporter& errorReporter,
Expand All @@ -446,7 +533,17 @@ void WorkerdApi::compileModules(
if (hasPythonModules(confModules)) {
KJ_REQUIRE(featureFlags.getPythonWorkers(),
"The python_workers compatibility flag is required to use Python.");
// Inject pyodide bootstrap module.
// Inject Pyodide bundle
if(util::Autogate::isEnabled(util::AutogateKey::PYODIDE_LOAD_EXTERNAL)) {
if (fetchPyodideBundle(impl->pythonConfig, "dev"_kj)) {
modules->addBuiltinBundle(getPyodideBundle("dev"_kj), kj::none);
} else {
Comment on lines +542 to +544
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll want to select between "dev" and the appropriate version by setting an appropriate compat flag so that tests can be a bit more declarative. I think it would be confusing if you try to test a specific version but the test uses latest because it exists. But we can deal with that in a followup.

// TODO: hardcoded version number
KJ_REQUIRE(fetchPyodideBundle(impl->pythonConfig, "2"_kj), "Failed to get both dev and hardcoded Pyodide version");
modules->addBuiltinBundle(getPyodideBundle("2"_kj), kj::none);
}
}
// Inject pyodide bootstrap module (TODO: load this from the capnproto bundle?)
garrettgu10 marked this conversation as resolved.
Show resolved Hide resolved
{
auto mainModule = confModules.begin();
capnp::MallocMessageBuilder message;
Expand Down Expand Up @@ -512,7 +609,7 @@ void WorkerdApi::compileModules(
using ObjectModuleInfo = jsg::ModuleRegistry::ObjectModuleInfo;
using ResolveMethod = jsg::ModuleRegistry::ResolveMethod;
auto specifier = "pyodide-internal:disk_cache";
auto diskCache = jsg::alloc<DiskCache>(impl->pythonConfig.diskCacheRoot);
auto diskCache = jsg::alloc<DiskCache>(impl->pythonConfig.packageDiskCacheRoot);
modules->addBuiltinModule(
specifier,
[specifier = kj::str(specifier), diskCache = kj::mv(diskCache)](
Expand Down Expand Up @@ -935,9 +1032,9 @@ kj::Own<jsg::modules::ModuleRegistry> WorkerdApi::initializeBundleModuleRegistry
jsg::modules::Module::newJsgObjectModuleHandler<
DiskCache,
JsgWorkerdIsolate_TypeWrapper>(
[diskCache=jsg::alloc<DiskCache>(pythonConfig.diskCacheRoot)]
[packageDiskCache=jsg::alloc<DiskCache>(pythonConfig.packageDiskCacheRoot)]
(jsg::Lock& js) mutable -> jsg::Ref<DiskCache> {
return diskCache.addRef();
return packageDiskCache.addRef();
}));
// Inject a (disabled) SimplePythonLimiter
pyodideBundleBuilder.addSynthetic(limiterSpecifier,
Expand Down
14 changes: 11 additions & 3 deletions src/workerd/server/workerd.c++
Original file line number Diff line number Diff line change
Expand Up @@ -731,8 +731,10 @@ public:
.addOption({"experimental"}, [this]() { server->allowExperimental(); return true; },
"Permit the use of experimental features which may break backwards "
"compatibility in a future release.")
.addOptionWithArg({"disk-cache-dir"}, CLI_METHOD(setPythonDiskCacheDir), "<path>",
.addOptionWithArg({"pyodide-package-disk-cache-dir"}, CLI_METHOD(setPackageDiskCacheDir), "<path>",
"Use <path> as a disk cache to avoid repeatedly fetching packages from the internet. ")
.addOptionWithArg({"pyodide-bundle-disk-cache-dir"}, CLI_METHOD(setPyodideDiskCacheDir), "<path>",
"Use <path> as a disk cache to avoid repeatedly fetching Pyodide bundles from the internet. ")
.addOption({"python-save-snapshot"}, [this]() { server->setPythonCreateSnapshot(); return true; },
"Save a dedicated snapshot to the disk cache")
.addOption({"python-save-baseline-snapshot"}, [this]() { server->setPythonCreateBaselineSnapshot(); return true; },
Expand Down Expand Up @@ -937,10 +939,16 @@ public:
server->enableControl(fd);
}

void setPythonDiskCacheDir(kj::StringPtr pathStr) {
void setPackageDiskCacheDir(kj::StringPtr pathStr) {
kj::Path path = fs->getCurrentPath().eval(pathStr);
kj::Maybe<kj::Own<const kj::Directory>> dir = fs->getRoot().tryOpenSubdir(path, kj::WriteMode::MODIFY);
server->setPythonDiskCacheRoot(kj::mv(dir));
server->setPackageDiskCacheRoot(kj::mv(dir));
}

void setPyodideDiskCacheDir(kj::StringPtr pathStr) {
kj::Path path = fs->getCurrentPath().eval(pathStr);
kj::Maybe<kj::Own<const kj::Directory>> dir = fs->getRoot().tryOpenSubdir(path, kj::WriteMode::MODIFY);
server->setPyodideDiskCacheRoot(kj::mv(dir));
}

void watch() {
Expand Down
2 changes: 1 addition & 1 deletion src/workerd/tests/test-fixture.c++
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ class MockActorLoopback : public Worker::Actor::Loopback, public kj::Refcounted

using api::pyodide::PythonConfig;

PythonConfig defaultPythonConfig { .diskCacheRoot = kj::none, .createSnapshot = false, .createBaselineSnapshot = false };
PythonConfig defaultPythonConfig { .packageDiskCacheRoot = kj::none, .pyodideDiskCacheRoot = kj::none, .createSnapshot = false, .createBaselineSnapshot = false };

TestFixture::TestFixture(SetupParams&& params)
: waitScope(params.waitScope),
Expand Down
Loading