Skip to content

Commit

Permalink
Merge branch 'main' into AdityaAtulTewari/http-request-cache-part1
Browse files Browse the repository at this point in the history
  • Loading branch information
AdityaAtulTewari committed Jul 31, 2024
2 parents 36072f8 + 8762c15 commit 8ee2cc9
Show file tree
Hide file tree
Showing 20 changed files with 255 additions and 130 deletions.
2 changes: 1 addition & 1 deletion build/pyodide_bucket.bzl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Do not edit this file by hand. See docs/pyodide.md for info on how to generate it.
# These variables are factored out here because they are being shared by the WORKSPACE files in
# both edgeworker and workerd, as well as src/pyodide/BUILD.bazel
PYODIDE_PACKAGE_BUCKET_URL = "https://pub-45d734c4145d4285b343833ee450ef38.r2.dev/20240513.2/"
PYODIDE_PACKAGE_BUCKET_URL = "https://pyodide-packages.runtime-playground.workers.dev/20240513.2/"
PYODIDE_GITHUB_RELEASE_URL = "https://github.com/cloudflare/pyodide-build-scripts/releases/download/20240513.2/"
PYODIDE_LOCK_SHA256 = "51eb3fd8dae5f551e2393ac58edfaf6a6c8d9c51b39c1584dd5d74bd7fb803fc"
PYODIDE_PACKAGES_TAR_ZIP_SHA256 = "b71d4c3cee3b6bd12969a788545f4159fb1eb984a7ca5de2493c4fa8479beeec"
Expand Down
9 changes: 7 additions & 2 deletions build/wd_js_bundle.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def _copy_modules(modules, declarations):
result[new_filename] = modules[m]
return result, declarations_result

def wd_js_bundle(
def wd_js_bundle_capnp(
name,
import_name,
schema_id,
Expand Down Expand Up @@ -146,6 +146,7 @@ def wd_js_bundle(
internal_json_modules: list of json source files
declarations: d.ts label set
deps: dependency list
Returns: The set of data dependencies
"""
builtin_modules_dict = {
m: "{}:{}".format(import_name, _to_name(m))
Expand Down Expand Up @@ -201,7 +202,7 @@ def wd_js_bundle(

gen_api_bundle_capnpn(
name = name + "@gen",
out = name + ".capnp",
out = name,
schema_id = schema_id,
const_name = import_name + "Bundle",
builtin_modules = builtin_modules_dict,
Expand All @@ -213,7 +214,11 @@ def wd_js_bundle(
data = data,
deps = deps,
)
return data


def wd_js_bundle(name, import_name, *args, **kwargs):
data = wd_js_bundle_capnp(name + ".capnp", import_name, *args, **kwargs)
cc_capnp_library(
name = name,
srcs = [name + ".capnp"],
Expand Down
21 changes: 18 additions & 3 deletions build/wd_ts_bundle.bzl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
load("@aspect_rules_ts//ts:defs.bzl", "ts_config", "ts_project")
load("@npm//:eslint/package_json.bzl", eslint_bin = "bin")
load("@workerd//:build/wd_js_bundle.bzl", "wd_js_bundle")
load("@workerd//:build/wd_js_bundle.bzl", "wd_js_bundle_capnp")
load("@capnp-cpp//src/capnp:cc_capnp_library.bzl", "cc_capnp_library")

def _to_js(file_name):
if file_name.endswith(".ts"):
Expand All @@ -10,7 +11,7 @@ def _to_js(file_name):
def _to_d_ts(file_name):
return file_name.removesuffix(".ts") + ".d.ts"

def wd_ts_bundle(
def wd_ts_bundle_capnp(
name,
import_name,
schema_id,
Expand Down Expand Up @@ -59,7 +60,7 @@ def wd_ts_bundle(
deps = deps,
)

wd_js_bundle(
data = wd_js_bundle_capnp(
name = name,
import_name = import_name,
# builtin modules are accessible under "<import_name>:<module_name>" name
Expand Down Expand Up @@ -96,3 +97,17 @@ def wd_ts_bundle(
"//conditions:default": [],
}),
)
return data


def wd_ts_bundle(name, import_name, *args, **kwargs):
data = wd_ts_bundle_capnp(name + ".capnp", import_name, *args, **kwargs)
cc_capnp_library(
name = name,
srcs = [name + ".capnp"],
strip_include_prefix = "",
visibility = ["//visibility:public"],
data = data,
deps = ["@workerd//src/workerd/jsg:modules_capnp"],
include_prefix = import_name,
)
37 changes: 34 additions & 3 deletions src/pyodide/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ load("@bazel_skylib//rules:write_file.bzl", "write_file")
load("@bazel_skylib//rules:expand_template.bzl", "expand_template")
load("@capnp-cpp//src/capnp:cc_capnp_library.bzl", "cc_capnp_library")
load("//:build/capnp_embed.bzl", "capnp_embed")
load("//:build/wd_ts_bundle.bzl", "wd_ts_bundle")
load("//:build/wd_ts_bundle.bzl", "wd_ts_bundle_capnp")

copy_file(
name = "pyodide_packages_archive",
Expand Down Expand Up @@ -146,8 +146,8 @@ expand_template(
template = "@pyodide//:pyodide/pyodide.asm.js",
)

wd_ts_bundle(
name = "pyodide",
data = wd_ts_bundle_capnp(
name = "pyodide.capnp",
modules = ["python-entrypoint-helper.ts"],
import_name = "pyodide",
internal_data_modules = ["generated/python_stdlib.zip"] + glob([
Expand Down Expand Up @@ -179,3 +179,34 @@ wd_ts_bundle(
"pyodide-bucket.json@rule",
],
)

cc_capnp_library(
name = "pyodide",
srcs = ["pyodide.capnp"],
strip_include_prefix = "",
visibility = ["//visibility:public"],
data = data,
deps = ["@workerd//src/workerd/jsg:modules_capnp"],
include_prefix = "pyodide",
)


genrule(
name = "pyodide.capnp.bin@rule",
tools = ["@capnp-cpp//src/capnp:capnp_tool"],
srcs = ["pyodide.capnp", "//src/workerd/jsg:modules.capnp"] + data,
outs = ["pyodide.capnp.bin"],
visibility = ["//visibility:public"],
cmd = " ".join([
# Annoying logic to deal with different paths in workerd vs downstream.
# Either need "-I src" in workerd or -I external/workerd/src downstream
"INCLUDE=$$(stat src > /dev/null 2>&1 && echo src || echo external/workerd/src);",
"$(execpath @capnp-cpp//src/capnp:capnp_tool)",
"eval",
"$(location :pyodide.capnp)",
"pyodideBundle",
"-I $$INCLUDE",
"-o binary",
"> $@",
])
)
16 changes: 15 additions & 1 deletion src/pyodide/internal/process_script_imports.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# This script is used to prepare a worker prior to a _package_ memory snapshot being taken.
# All it does is walk through the imports in each of the worker's modules and attempts to import
# them. Local imports are not possible because the worker file path is explicitly removed from the
# module search path.
CF_LOADED_MODULES=[]
def _do_it():
import ast
from pathlib import Path
import sys

def find_imports(source: str) -> list[str]:
try:
Expand All @@ -23,12 +29,20 @@ def process_script(script):
for mod in find_imports(script):
try:
__import__(mod)
CF_LOADED_MODULES.append(mod)
except ImportError:
pass

def process_scripts():
for script in Path("/session/metadata").glob("**/*.py"):
# Currently this script assumes that it is generating a _package_ snapshot- one that
# only includes non-vendored packages. Because of this we do not wish to import local
# modules, the easiest way to ensure they cannot be imported is to remove
# `/session/metadata` from the sys path.
worker_files_path = "/session/metadata"
sys.path.remove(worker_files_path)
for script in Path(worker_files_path).glob("**/*.py"):
process_script(script.read_text())
sys.path.append(worker_files_path)

process_scripts()

Expand Down
50 changes: 31 additions & 19 deletions src/pyodide/internal/snapshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ export async function uploadArtifacts(): Promise<void> {
/**
* Used to hold the memory that needs to be uploaded for the validator.
*/
let MEMORY_TO_UPLOAD: Uint8Array | undefined = undefined;
function getMemoryToUpload(): Uint8Array {
let MEMORY_TO_UPLOAD: ArtifactBundler.MemorySnapshotResult | undefined = undefined;
function getMemoryToUpload(): ArtifactBundler.MemorySnapshotResult {
if (!MEMORY_TO_UPLOAD) {
throw new TypeError("Expected MEMORY_TO_UPLOAD to be set");
}
Expand Down Expand Up @@ -238,14 +238,16 @@ const SNAPSHOT_IMPORTS = [
* If we are doing a baseline snapshot, just import everything from
* SNAPSHOT_IMPORTS. These will all succeed.
*
* If doing a script-specific "dedicated" snap shot, also try to import each
* user import.
* If doing a more dedicated "package" snap shot, also try to import each
* user import that is importing non-vendored modules.
*
* All of this is being done in the __main__ global scope, so be careful not to
* pollute it with extra included-by-default names (user code is executed in its
* own separate module scope though so it's not _that_ important).
*
* This function returns a list of modules that have been imported.
*/
function memorySnapshotDoImports(Module: Module): void {
function memorySnapshotDoImports(Module: Module): Array<string> {
const toImport = SNAPSHOT_IMPORTS.join(",");
const toDelete = Array.from(
new Set(SNAPSHOT_IMPORTS.map((x) => x.split(".", 1)[0])),
Expand All @@ -256,15 +258,24 @@ function memorySnapshotDoImports(Module: Module): void {
simpleRunPython(Module, `del ${toDelete}`);
if (IS_CREATING_BASELINE_SNAPSHOT) {
// We've done all the imports for the baseline snapshot.
return;
return [];
}
// Script-specific imports: collect all import nodes from user scripts and try
// to import them, catching and throwing away all failures.
// see process_script_imports.py.

// Process the Python modules in the user worker looking for imports of packages which are not
// vendored. Vendored packages are skipped because they may contain sensitive information which
// we do not want to include in the package snapshot.
//
// See process_script_imports.py.
const processScriptImportsString = new TextDecoder().decode(
new Uint8Array(processScriptImports),
);
simpleRunPython(Module, processScriptImportsString);

const importedModules: Array<string> = JSON.parse(simpleRunPython(
Module, "import sys, json; print(json.dumps(CF_LOADED_MODULES), file=sys.stderr)"
));

return importedModules;
}

function checkLoadedSoFiles(dsoJSON: DylinkInfo): void {
Expand All @@ -287,29 +298,29 @@ function checkLoadedSoFiles(dsoJSON: DylinkInfo): void {
* are initialized in the linear memory snapshot and then saving a copy of the
* linear memory into MEMORY.
*/
function makeLinearMemorySnapshot(Module: Module): Uint8Array {
memorySnapshotDoImports(Module);
function makeLinearMemorySnapshot(Module: Module): ArtifactBundler.MemorySnapshotResult {
const importedModulesList = memorySnapshotDoImports(Module);
const dsoJSON = recordDsoHandles(Module);
if (IS_CREATING_BASELINE_SNAPSHOT) {
// checkLoadedSoFiles(dsoJSON);
}
return encodeSnapshot(Module.HEAP8, dsoJSON);
return { snapshot: encodeSnapshot(Module.HEAP8, dsoJSON), importedModulesList };
}

function setUploadFunction(toUpload: Uint8Array): void {
if (toUpload.constructor.name !== "Uint8Array") {
function setUploadFunction(snapshot: Uint8Array, importedModulesList: Array<string>): void {
if (snapshot.constructor.name !== "Uint8Array") {
throw new TypeError("Expected TO_UPLOAD to be a Uint8Array");
}
if (TOP_LEVEL_SNAPSHOT) {
MEMORY_TO_UPLOAD = toUpload;
MEMORY_TO_UPLOAD = { snapshot, importedModulesList };
return;
}
DEFERRED_UPLOAD_FUNCTION = async () => {
try {
const success = await ArtifactBundler.uploadMemorySnapshot(toUpload);
const success = await ArtifactBundler.uploadMemorySnapshot(snapshot);
// Free memory
// @ts-ignore
toUpload = undefined;
snapshot = undefined;
if (!success) {
console.warn("Memory snapshot upload failed.");
}
Expand All @@ -324,7 +335,8 @@ export function maybeSetupSnapshotUpload(Module: Module): void {
if (!SHOULD_UPLOAD_SNAPSHOT) {
return;
}
setUploadFunction(makeLinearMemorySnapshot(Module));
const { snapshot, importedModulesList } = makeLinearMemorySnapshot(Module);
setUploadFunction(snapshot, importedModulesList);
}

// "\x00snp"
Expand Down Expand Up @@ -449,7 +461,7 @@ export function maybeStoreMemorySnapshot() {
if (ArtifactBundler.isEwValidating()) {
ArtifactBundler.storeMemorySnapshot(getMemoryToUpload());
} else if (SHOULD_SNAPSHOT_TO_DISK) {
DiskCache.put("snapshot.bin", getMemoryToUpload());
DiskCache.put("snapshot.bin", getMemoryToUpload().snapshot);
console.log("Saved snapshot to disk");
}
}
7 changes: 6 additions & 1 deletion src/pyodide/types/artifacts.d.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
declare namespace ArtifactBundler {
type MemorySnapshotResult = {
snapshot: Uint8Array;
importedModulesList: Array<string>;
}

const hasMemorySnapshot: () => boolean;
const isEwValidating: () => boolean;
const isEnabled: () => boolean;
Expand All @@ -9,7 +14,7 @@ declare namespace ArtifactBundler {
) => void;
const getMemorySnapshotSize: () => number;
const disposeMemorySnapshot: () => void;
const storeMemorySnapshot: (snap: Uint8Array) => void;
const storeMemorySnapshot: (snap: MemorySnapshotResult) => void;
}

export default ArtifactBundler;
Loading

0 comments on commit 8ee2cc9

Please sign in to comment.