Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implements Python import statement parser to avoid ast module. #2685

Merged
merged 1 commit into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 0 additions & 57 deletions src/pyodide/internal/process_script_imports.py

This file was deleted.

2 changes: 0 additions & 2 deletions src/pyodide/internal/process_script_imports.py.d.ts

This file was deleted.

36 changes: 20 additions & 16 deletions src/pyodide/internal/snapshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ import {
getSitePackagesPath,
} from 'pyodide-internal:setupPackages';
import { default as TarReader } from 'pyodide-internal:packages_tar_reader';
import processScriptImports from 'pyodide-internal:process_script_imports.py';
import {
SHOULD_SNAPSHOT_TO_DISK,
IS_CREATING_BASELINE_SNAPSHOT,
MEMORY_SNAPSHOT_READER,
} from 'pyodide-internal:metadata';
import { reportError, simpleRunPython } from 'pyodide-internal:util';
import { default as MetadataReader } from 'pyodide-internal:runtime-generated/metadata';

let LOADED_BASELINE_SNAPSHOT: number;

Expand Down Expand Up @@ -261,24 +261,28 @@ function memorySnapshotDoImports(Module: Module): Array<string> {
return [];
}

// Process the Python modules in the user worker looking for imports of packages which are not
// vendored. Vendored packages are skipped because they may contain sensitive information which
// we do not want to include in the package snapshot.
//
// See process_script_imports.py.
const processScriptImportsString = new TextDecoder().decode(
new Uint8Array(processScriptImports)
// The `importedModules` list will contain all modules that have been imported, including local
// modules, the usual `js` and other stdlib modules. We want to filter out local imports, so we
// grab them and put them into a set for fast filtering.
const localModulePaths: Set<string> = new Set<string>(
MetadataReader.getNames()
);
simpleRunPython(Module, processScriptImportsString);
const importedModules: Array<string> = ArtifactBundler.constructor
// @ts-ignore parsePythonScriptImports is a static method.
.parsePythonScriptImports(MetadataReader.getWorkerFiles('py'))
.filter((module: string) => {
const moduleFilename = module.replace('.', '/') + '.py';
return !localModulePaths.has(moduleFilename) && module != 'js';
});

const importedModules: Array<string> = JSON.parse(
simpleRunPython(
Module,
'import sys, json; print(json.dumps(CF_LOADED_MODULES), file=sys.stderr)'
)
);
const deduplicatedModules = [...new Set(importedModules)];

// Import the modules list so they are included in the snapshot.
if (deduplicatedModules.length > 0) {
simpleRunPython(Module, 'import ' + deduplicatedModules.join(','));
}

return importedModules;
return deduplicatedModules;
}

function checkLoadedSoFiles(dsoJSON: DylinkInfo): void {
Expand Down
1 change: 1 addition & 0 deletions src/pyodide/types/FS.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ interface FS {
mode: number
): FSNode<Info>;
isFile: (mode: number) => boolean;
readdir: (path: string) => Array<string>;
genericErrors: Error[];
}

Expand Down
1 change: 1 addition & 0 deletions src/pyodide/types/runtime-generated/metadata.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ declare namespace MetadataReader {
const getMainModule: () => string;
const hasMemorySnapshot: () => boolean;
const getNames: () => string[];
const getWorkerFiles: (ext: string) => string[];
const getSizes: () => number[];
const readMemorySnapshot: (
offset: number,
Expand Down
8 changes: 8 additions & 0 deletions src/workerd/api/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,14 @@ wd_cc_library(
],
)

kj_test(
src = "pyodide/pyodide-test.c++",
deps = [
":pyodide",
"//src/workerd/tests:test-fixture",
],
)

wd_cc_library(
name = "data-url",
srcs = ["data-url.c++"],
Expand Down
118 changes: 118 additions & 0 deletions src/workerd/api/pyodide/pyodide-test.c++
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
// Copyright (c) 2017-2022 Cloudflare, Inc.
// Licensed under the Apache 2.0 license found in the LICENSE file or at:
// https://opensource.org/licenses/Apache-2.0

#include "pyodide.h"
#include <kj/test.h>

namespace workerd::api {
namespace {

KJ_TEST("basic `import` tests") {
auto files = kj::heapArrayBuilder<kj::String>(2);
files.add(kj::str("import a\nimport z"));
files.add(kj::str("import b"));
auto result = pyodide::ArtifactBundler::parsePythonScriptImports(files.finish());
KJ_REQUIRE(result.size() == 3);
KJ_REQUIRE(result[0] == "a");
KJ_REQUIRE(result[1] == "z");
KJ_REQUIRE(result[2] == "b");
}

KJ_TEST("supports whitespace") {
auto files = kj::heapArrayBuilder<kj::String>(1);
files.add(kj::str("import a\nimport \n\tz"));
auto result = pyodide::ArtifactBundler::parsePythonScriptImports(files.finish());
KJ_REQUIRE(result.size() == 2);
KJ_REQUIRE(result[0] == "a");
KJ_REQUIRE(result[1] == "z");
}

KJ_TEST("supports windows newlines") {
auto files = kj::heapArrayBuilder<kj::String>(1);
files.add(kj::str("import a\r\nimport \r\n\tz"));
auto result = pyodide::ArtifactBundler::parsePythonScriptImports(files.finish());
KJ_REQUIRE(result.size() == 2);
KJ_REQUIRE(result[0] == "a");
KJ_REQUIRE(result[1] == "z");
}

KJ_TEST("basic `from` test") {
auto files = kj::heapArrayBuilder<kj::String>(1);
files.add(kj::str("from x import a,b\nfrom z import y"));
auto result = pyodide::ArtifactBundler::parsePythonScriptImports(files.finish());
KJ_REQUIRE(result.size() == 2);
KJ_REQUIRE(result[0] == "x");
KJ_REQUIRE(result[1] == "z");
}

KJ_TEST("ignores indented blocks") {
auto files = kj::heapArrayBuilder<kj::String>(1);
files.add(kj::str("import a\nif True:\n import x\nimport y"));
dom96 marked this conversation as resolved.
Show resolved Hide resolved
auto result = pyodide::ArtifactBundler::parsePythonScriptImports(files.finish());
KJ_REQUIRE(result.size() == 2);
KJ_REQUIRE(result[0] == "a");
KJ_REQUIRE(result[1] == "y");
}

KJ_TEST("supports nested imports") {
auto files = kj::heapArrayBuilder<kj::String>(1);
files.add(kj::str("import a.b\nimport z.x.y.i"));
auto result = pyodide::ArtifactBundler::parsePythonScriptImports(files.finish());
KJ_REQUIRE(result.size() == 2);
KJ_REQUIRE(result[0] == "a.b");
KJ_REQUIRE(result[1] == "z.x.y.i");
}

KJ_TEST("nested `from` test") {
auto files = kj::heapArrayBuilder<kj::String>(1);
files.add(kj::str("from x.y.z import a,b\nfrom z import y"));
auto result = pyodide::ArtifactBundler::parsePythonScriptImports(files.finish());
KJ_REQUIRE(result.size() == 2);
KJ_REQUIRE(result[0] == "x.y.z");
KJ_REQUIRE(result[1] == "z");
}

KJ_TEST("ignores trailing period") {
auto files = kj::heapArrayBuilder<kj::String>(1);
files.add(kj::str("import a.b.\nimport z.x.y.i."));
auto result = pyodide::ArtifactBundler::parsePythonScriptImports(files.finish());
KJ_REQUIRE(result.size() == 0);
}

KJ_TEST("ignores relative import") {
// This is where we diverge from the old AST-based approach. It would have returned `y` in the
// input below.
auto files = kj::heapArrayBuilder<kj::String>(1);
files.add(kj::str("import .a.b\nimport ..z.x\nfrom .y import x"));
dom96 marked this conversation as resolved.
Show resolved Hide resolved
auto result = pyodide::ArtifactBundler::parsePythonScriptImports(files.finish());
KJ_REQUIRE(result.size() == 0);
}

KJ_TEST("supports commas") {
auto files = kj::heapArrayBuilder<kj::String>(1);
files.add(kj::str("import a,b"));
auto result = pyodide::ArtifactBundler::parsePythonScriptImports(files.finish());
KJ_REQUIRE(result.size() == 2);
KJ_REQUIRE(result[0] == "a");
KJ_REQUIRE(result[1] == "b");
}

KJ_TEST("supports backslash") {
auto files = kj::heapArrayBuilder<kj::String>(4);
files.add(kj::str("import a\\\n,b"));
files.add(kj::str("import\\\n q,w"));
files.add(kj::str("from \\\nx import y"));
files.add(kj::str("from \\\n c import y"));
auto result = pyodide::ArtifactBundler::parsePythonScriptImports(files.finish());
KJ_REQUIRE(result.size() == 6);
KJ_REQUIRE(result[0] == "a");
KJ_REQUIRE(result[1] == "b");
KJ_REQUIRE(result[2] == "q");
KJ_REQUIRE(result[3] == "w");
KJ_REQUIRE(result[4] == "x");
KJ_REQUIRE(result[5] == "c");
}

} // namespace
} // namespace workerd::api
Loading
Loading