From 5f7a6a2811acf226560511b0c982909bd82d6d86 Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Sat, 28 Oct 2023 19:05:59 +0200 Subject: [PATCH 1/2] src: support configurable snapshot - Add support for --build-snapshot-config which allows passing snapshot configurations via a JSON configuration file. - Add support for node::SnapshotConfig in the embedder API The initial configurable options are: - "builder" (SnapshotConfig::builder_script_path): path to the builder script. - "withoutCodeCache" (SnapshotFlags::kWithoutCodeCache): disable code cache generation. --- src/api/embed_helpers.cc | 17 ++- src/env.cc | 17 ++- src/env.h | 16 ++- src/node.cc | 51 ++++++-- src/node.h | 34 ++++- src/node_internals.h | 1 + src/node_main_instance.cc | 2 - src/node_options.cc | 6 + src/node_options.h | 1 + src/node_sea.cc | 15 ++- src/node_snapshot_builder.h | 31 ++--- src/node_snapshotable.cc | 171 +++++++++++++++++++------- src/node_snapshotable.h | 4 +- test/embedding/embedtest.cc | 34 +++-- test/embedding/test-embedding.js | 4 +- test/parallel/test-snapshot-config.js | 138 +++++++++++++++++++++ tools/snapshot/node_mksnapshot.cc | 15 ++- 17 files changed, 454 insertions(+), 103 deletions(-) create mode 100644 test/parallel/test-snapshot-config.js diff --git a/src/api/embed_helpers.cc b/src/api/embed_helpers.cc index 341d131f24f753..6fac48d1b534d2 100644 --- a/src/api/embed_helpers.cc +++ b/src/api/embed_helpers.cc @@ -92,7 +92,8 @@ CommonEnvironmentSetup::CommonEnvironmentSetup( std::vector* errors, const EmbedderSnapshotData* snapshot_data, uint32_t flags, - std::function make_env) + std::function make_env, + const SnapshotConfig* snapshot_config) : impl_(new Impl()) { CHECK_NOT_NULL(platform); CHECK_NOT_NULL(errors); @@ -142,8 +143,7 @@ CommonEnvironmentSetup::CommonEnvironmentSetup( impl_->isolate_data.reset(CreateIsolateData( isolate, loop, platform, impl_->allocator.get(), snapshot_data)); - impl_->isolate_data->set_is_building_snapshot( - impl_->snapshot_creator.has_value()); + impl_->isolate_data->set_snapshot_config(snapshot_config); if (snapshot_data) { impl_->env.reset(make_env(this)); @@ -176,7 +176,8 @@ CommonEnvironmentSetup::CreateForSnapshotting( MultiIsolatePlatform* platform, std::vector* errors, const std::vector& args, - const std::vector& exec_args) { + const std::vector& exec_args, + const SnapshotConfig& snapshot_config) { // It's not guaranteed that a context that goes through // v8_inspector::V8Inspector::contextCreated() is runtime-independent, // so do not start the inspector on the main context when building @@ -196,7 +197,8 @@ CommonEnvironmentSetup::CreateForSnapshotting( args, exec_args, static_cast(env_flags)); - })); + }, + &snapshot_config)); if (!errors->empty()) ret.reset(); return ret; } @@ -240,10 +242,7 @@ EmbedderSnapshotData::Pointer CommonEnvironmentSetup::CreateSnapshot() { EmbedderSnapshotData::Pointer result{ new EmbedderSnapshotData(snapshot_data, true)}; - auto exit_code = SnapshotBuilder::CreateSnapshot( - snapshot_data, - this, - static_cast(SnapshotMetadata::Type::kFullyCustomized)); + auto exit_code = SnapshotBuilder::CreateSnapshot(snapshot_data, this); if (exit_code != ExitCode::kNoFailure) return {}; return result; diff --git a/src/env.cc b/src/env.cc index a429d5526d0af6..1b6d46aedfbb65 100644 --- a/src/env.cc +++ b/src/env.cc @@ -285,6 +285,12 @@ std::ostream& operator<<(std::ostream& output, return output; } +std::ostream& operator<<(std::ostream& output, const SnapshotFlags& flags) { + output << "static_cast(" << static_cast(flags) + << ")"; + return output; +} + std::ostream& operator<<(std::ostream& output, const SnapshotMetadata& i) { output << "{\n" << " " @@ -296,6 +302,7 @@ std::ostream& operator<<(std::ostream& output, const SnapshotMetadata& i) { << " \"" << i.node_arch << "\", // node_arch\n" << " \"" << i.node_platform << "\", // node_platform\n" << " " << i.v8_cache_version_tag << ", // v8_cache_version_tag\n" + << " " << i.flags << ", // flags\n" << "}"; return output; } @@ -806,8 +813,14 @@ Environment::Environment(IsolateData* isolate_data, isolate_data->worker_context()->env()->builtin_loader()); } else if (isolate_data->snapshot_data() != nullptr) { // ... otherwise, if a snapshot was provided, use its code cache. - builtin_loader()->RefreshCodeCache( - isolate_data->snapshot_data()->code_cache); + size_t cache_size = isolate_data->snapshot_data()->code_cache.size(); + per_process::Debug(DebugCategory::CODE_CACHE, + "snapshot contains %zu code cache\n", + cache_size); + if (cache_size > 0) { + builtin_loader()->RefreshCodeCache( + isolate_data->snapshot_data()->code_cache); + } } // We'll be creating new objects so make sure we've entered the context. diff --git a/src/env.h b/src/env.h index 1047f1d794da2a..c7a0fd1383f9f5 100644 --- a/src/env.h +++ b/src/env.h @@ -147,8 +147,15 @@ class NODE_EXTERN_PRIVATE IsolateData : public MemoryRetainer { void MemoryInfo(MemoryTracker* tracker) const override; IsolateDataSerializeInfo Serialize(v8::SnapshotCreator* creator); - bool is_building_snapshot() const { return is_building_snapshot_; } - void set_is_building_snapshot(bool value) { is_building_snapshot_ = value; } + bool is_building_snapshot() const { return snapshot_config_.has_value(); } + const SnapshotConfig* snapshot_config() const { + return snapshot_config_.has_value() ? &(snapshot_config_.value()) : nullptr; + } + void set_snapshot_config(const SnapshotConfig* config) { + if (config != nullptr) { + snapshot_config_ = *config; // Copy the config. + } + } uint16_t* embedder_id_for_cppgc() const; uint16_t* embedder_id_for_non_cppgc() const; @@ -237,11 +244,13 @@ class NODE_EXTERN_PRIVATE IsolateData : public MemoryRetainer { uv_loop_t* const event_loop_; NodeArrayBufferAllocator* const node_allocator_; MultiIsolatePlatform* platform_; + const SnapshotData* snapshot_data_; + std::optional snapshot_config_; + std::unique_ptr cpp_heap_; std::shared_ptr options_; worker::Worker* worker_context_ = nullptr; - bool is_building_snapshot_ = false; PerIsolateWrapperData* wrapper_data_; static Mutex isolate_data_mutex_; @@ -526,6 +535,7 @@ struct SnapshotMetadata { std::string node_platform; // Result of v8::ScriptCompiler::CachedDataVersionTag(). uint32_t v8_cache_version_tag; + SnapshotFlags flags; }; struct SnapshotData { diff --git a/src/node.cc b/src/node.cc index 524f80ee69ee52..4e0c5036dbab46 100644 --- a/src/node.cc +++ b/src/node.cc @@ -1206,10 +1206,39 @@ ExitCode GenerateAndWriteSnapshotData(const SnapshotData** snapshot_data_ptr, // nullptr indicates there's no snapshot data. DCHECK_NULL(*snapshot_data_ptr); + SnapshotConfig snapshot_config; + const std::string& config_path = + per_process::cli_options->per_isolate->build_snapshot_config; + // For snapshot config read from JSON, we fix up process.argv[1] using the + // "builder" field. + std::vector args_maybe_patched; + args_maybe_patched.reserve(result->args().size() + 1); + if (!config_path.empty()) { + std::optional optional_config = + ReadSnapshotConfig(config_path.c_str()); + if (!optional_config.has_value()) { + return ExitCode::kGenericUserError; + } + snapshot_config = std::move(optional_config.value()); + DCHECK(snapshot_config.builder_script_path.has_value()); + args_maybe_patched.emplace_back(result->args()[0]); + args_maybe_patched.emplace_back( + snapshot_config.builder_script_path.value()); + if (result->args().size() > 1) { + args_maybe_patched.insert(args_maybe_patched.end(), + result->args().begin() + 1, + result->args().end()); + } + } else { + snapshot_config.builder_script_path = result->args()[1]; + args_maybe_patched = result->args(); + } + DCHECK(snapshot_config.builder_script_path.has_value()); + const std::string& builder_script = + snapshot_config.builder_script_path.value(); // node:embedded_snapshot_main indicates that we are using the // embedded snapshot and we are not supposed to clean it up. - const std::string& main_script = result->args()[1]; - if (main_script == "node:embedded_snapshot_main") { + if (builder_script == "node:embedded_snapshot_main") { *snapshot_data_ptr = SnapshotBuilder::GetEmbeddedSnapshotData(); if (*snapshot_data_ptr == nullptr) { // The Node.js binary is built without embedded snapshot @@ -1221,24 +1250,25 @@ ExitCode GenerateAndWriteSnapshotData(const SnapshotData** snapshot_data_ptr, return exit_code; } } else { - // Otherwise, load and run the specified main script. + // Otherwise, load and run the specified builder script. std::unique_ptr generated_data = std::make_unique(); - std::string main_script_content; - int r = ReadFileSync(&main_script_content, main_script.c_str()); + std::string builder_script_content; + int r = ReadFileSync(&builder_script_content, builder_script.c_str()); if (r != 0) { FPrintF(stderr, - "Cannot read main script %s for building snapshot. %s: %s", - main_script, + "Cannot read builder script %s for building snapshot. %s: %s", + builder_script, uv_err_name(r), uv_strerror(r)); return ExitCode::kGenericUserError; } exit_code = node::SnapshotBuilder::Generate(generated_data.get(), - result->args(), + args_maybe_patched, result->exec_args(), - main_script_content); + builder_script_content, + snapshot_config); if (exit_code == ExitCode::kNoFailure) { *snapshot_data_ptr = generated_data.release(); } else { @@ -1368,7 +1398,8 @@ static ExitCode StartInternal(int argc, char** argv) { // --build-snapshot indicates that we are in snapshot building mode. if (per_process::cli_options->per_isolate->build_snapshot) { - if (result->args().size() < 2) { + if (per_process::cli_options->per_isolate->build_snapshot_config.empty() && + result->args().size() < 2) { fprintf(stderr, "--build-snapshot must be used with an entry point script.\n" "Usage: node --build-snapshot /path/to/entry.js\n"); diff --git a/src/node.h b/src/node.h index f2740116a4710b..bf3382f4c952ca 100644 --- a/src/node.h +++ b/src/node.h @@ -80,6 +80,7 @@ #include #include +#include #include // We cannot use __POSIX__ in this header because that's only defined when @@ -659,6 +660,33 @@ enum Flags : uint64_t { }; } // namespace EnvironmentFlags +enum class SnapshotFlags : uint32_t { + kDefault = 0, + // Whether code cache should be generated as part of the snapshot. + // Code cache reduces the time spent on compiling functions included + // in the snapshot at the expense of a bigger snapshot size and + // potentially breaking portability of the snapshot. + kWithoutCodeCache = 1 << 0, +}; + +struct SnapshotConfig { + SnapshotFlags flags = SnapshotFlags::kDefault; + + // When builder_script_path is std::nullopt, the snapshot is generated as a + // built-in snapshot instead of a custom one, and it's expected that the + // built-in snapshot only contains states that reproduce in every run of the + // application. The event loop won't be run when generating a built-in + // snapshot, so asynchronous operations should be avoided. + // + // When builder_script_path is an std::string, it should match args[1] + // passed to CreateForSnapshotting(). The embedder is also expected to use + // LoadEnvironment() to run a script matching this path. In that case the + // snapshot is generated as a custom snapshot and the event loop is run, so + // the snapshot builder can execute asynchronous operations as long as they + // are run to completion when the snapshot is taken. + std::optional builder_script_path; +}; + struct InspectorParentHandle { virtual ~InspectorParentHandle() = default; }; @@ -870,7 +898,8 @@ class NODE_EXTERN CommonEnvironmentSetup { MultiIsolatePlatform* platform, std::vector* errors, const std::vector& args = {}, - const std::vector& exec_args = {}); + const std::vector& exec_args = {}, + const SnapshotConfig& snapshot_config = {}); EmbedderSnapshotData::Pointer CreateSnapshot(); struct uv_loop_s* event_loop() const; @@ -905,7 +934,8 @@ class NODE_EXTERN CommonEnvironmentSetup { std::vector*, const EmbedderSnapshotData*, uint32_t flags, - std::function); + std::function, + const SnapshotConfig* config = nullptr); }; // Implementation for CommonEnvironmentSetup::Create diff --git a/src/node_internals.h b/src/node_internals.h index 9a96e042fc5cda..7a70063589b189 100644 --- a/src/node_internals.h +++ b/src/node_internals.h @@ -418,6 +418,7 @@ std::string Basename(const std::string& str, const std::string& extension); node_module napi_module_to_node_module(const napi_module* mod); +std::ostream& operator<<(std::ostream& output, const SnapshotFlags& flags); std::ostream& operator<<(std::ostream& output, const std::vector& v); std::ostream& operator<<(std::ostream& output, diff --git a/src/node_main_instance.cc b/src/node_main_instance.cc index e1e456cfad9325..64ab1375708c00 100644 --- a/src/node_main_instance.cc +++ b/src/node_main_instance.cc @@ -56,8 +56,6 @@ NodeMainInstance::NodeMainInstance(const SnapshotData* snapshot_data, platform, array_buffer_allocator_.get(), snapshot_data->AsEmbedderWrapper().get())); - isolate_data_->set_is_building_snapshot( - per_process::cli_options->per_isolate->build_snapshot); isolate_data_->max_young_gen_size = isolate_params_->constraints.max_young_generation_size_in_bytes(); diff --git a/src/node_options.cc b/src/node_options.cc index 9680ea6ed8312b..bb66895fef0402 100644 --- a/src/node_options.cc +++ b/src/node_options.cc @@ -850,6 +850,12 @@ PerIsolateOptionsParser::PerIsolateOptionsParser( "Generate a snapshot blob when the process exits.", &PerIsolateOptions::build_snapshot, kDisallowedInEnvvar); + AddOption("--build-snapshot-config", + "Generate a snapshot blob when the process exits using a" + "JSON configuration in the specified path.", + &PerIsolateOptions::build_snapshot_config, + kDisallowedInEnvvar); + Implies("--build-snapshot-config", "--build-snapshot"); Insert(eop, &PerIsolateOptions::get_per_env_options); } diff --git a/src/node_options.h b/src/node_options.h index af19dd612387ae..93bbd51d8b6e7c 100644 --- a/src/node_options.h +++ b/src/node_options.h @@ -235,6 +235,7 @@ class PerIsolateOptions : public Options { bool experimental_shadow_realm = false; std::string report_signal = "SIGUSR2"; bool build_snapshot = false; + std::string build_snapshot_config; inline EnvironmentOptions* get_per_env_options(); void CheckOptions(std::vector* errors, std::vector* argv) override; diff --git a/src/node_sea.cc b/src/node_sea.cc index d1ab5051032d76..c4fa84303034ff 100644 --- a/src/node_sea.cc +++ b/src/node_sea.cc @@ -377,14 +377,18 @@ std::optional ParseSingleExecutableConfig( ExitCode GenerateSnapshotForSEA(const SeaConfig& config, const std::vector& args, const std::vector& exec_args, - const std::string& main_script, + const std::string& builder_script_content, + const SnapshotConfig& snapshot_config, std::vector* snapshot_blob) { SnapshotData snapshot; // TODO(joyeecheung): make the arguments configurable through the JSON // config or a programmatic API. std::vector patched_args = {args[0], config.main_path}; - ExitCode exit_code = SnapshotBuilder::Generate( - &snapshot, patched_args, exec_args, main_script); + ExitCode exit_code = SnapshotBuilder::Generate(&snapshot, + patched_args, + exec_args, + builder_script_content, + snapshot_config); if (exit_code != ExitCode::kNoFailure) { return exit_code; } @@ -481,8 +485,11 @@ ExitCode GenerateSingleExecutableBlob( bool builds_snapshot_from_main = static_cast(config.flags & SeaFlags::kUseSnapshot); if (builds_snapshot_from_main) { + // TODO(joyeecheung): allow passing snapshot configuration in SEA configs. + SnapshotConfig snapshot_config; + snapshot_config.builder_script_path = main_script; ExitCode exit_code = GenerateSnapshotForSEA( - config, args, exec_args, main_script, &snapshot_blob); + config, args, exec_args, main_script, snapshot_config, &snapshot_blob); if (exit_code != ExitCode::kNoFailure) { return exit_code; } diff --git a/src/node_snapshot_builder.h b/src/node_snapshot_builder.h index 66768cfd201b5e..e2302946d1f8cb 100644 --- a/src/node_snapshot_builder.h +++ b/src/node_snapshot_builder.h @@ -16,20 +16,25 @@ namespace node { class ExternalReferenceRegistry; struct SnapshotData; +std::optional ReadSnapshotConfig(const char* path); + class NODE_EXTERN_PRIVATE SnapshotBuilder { public: - static ExitCode GenerateAsSource( - const char* out_path, + static ExitCode GenerateAsSource(const char* out_path, + const std::vector& args, + const std::vector& exec_args, + const SnapshotConfig& config, + bool use_array_literals = false); + + // Generate the snapshot into out. builder_script_content should match + // config.builder_script_path. This is passed separately + // in case the script is already read for other purposes. + static ExitCode Generate( + SnapshotData* out, const std::vector& args, const std::vector& exec_args, - std::optional main_script_path = std::nullopt, - bool use_array_literals = false); - - // Generate the snapshot into out. - static ExitCode Generate(SnapshotData* out, - const std::vector& args, - const std::vector& exec_args, - std::optional main_script); + std::optional builder_script_content, + const SnapshotConfig& config); // If nullptr is returned, the binary is not built with embedded // snapshot. @@ -39,10 +44,8 @@ class NODE_EXTERN_PRIVATE SnapshotBuilder { static const std::vector& CollectExternalReferences(); - static ExitCode CreateSnapshot( - SnapshotData* out, - CommonEnvironmentSetup* setup, - /*SnapshotMetadata::Type*/ uint8_t snapshot_type); + static ExitCode CreateSnapshot(SnapshotData* out, + CommonEnvironmentSetup* setup); private: static std::unique_ptr registry_; diff --git a/src/node_snapshotable.cc b/src/node_snapshotable.cc index 71d64325765048..2d2f0192723ed9 100644 --- a/src/node_snapshotable.cc +++ b/src/node_snapshotable.cc @@ -10,6 +10,7 @@ #include "debug_utils-inl.h" #include "encoding_binding.h" #include "env-inl.h" +#include "json_parser.h" #include "node_blob.h" #include "node_builtins.h" #include "node_contextify.h" @@ -542,6 +543,7 @@ SnapshotMetadata SnapshotDeserializer::Read() { result.node_arch = ReadString(); result.node_platform = ReadString(); result.v8_cache_version_tag = ReadArithmetic(); + result.flags = static_cast(ReadArithmetic()); if (is_debug) { std::string str = ToStr(result); @@ -571,6 +573,9 @@ size_t SnapshotSerializer::Write(const SnapshotMetadata& data) { Debug("Write V8 cached data version tag %" PRIx32 "\n", data.v8_cache_version_tag); written_total += WriteArithmetic(data.v8_cache_version_tag); + Debug("Write snapshot flags %" PRIx32 "\n", + static_cast(data.flags)); + written_total += WriteArithmetic(static_cast(data.flags)); return written_total; } @@ -691,19 +696,21 @@ bool SnapshotData::Check() const { return false; } - uint32_t current_cache_version = v8::ScriptCompiler::CachedDataVersionTag(); - if (metadata.v8_cache_version_tag != current_cache_version && - metadata.type == SnapshotMetadata::Type::kFullyCustomized) { - // For now we only do this check for the customized snapshots - we know - // that the flags we use in the default snapshot are limited and safe - // enough so we can relax the constraints for it. - fprintf(stderr, - "Failed to load the startup snapshot because it was built with " - "a different version of V8 or with different V8 configurations.\n" - "Expected tag %" PRIx32 ", read %" PRIx32 "\n", - current_cache_version, - metadata.v8_cache_version_tag); - return false; + if (metadata.type == SnapshotMetadata::Type::kFullyCustomized && + !WithoutCodeCache(metadata.flags)) { + uint32_t current_cache_version = v8::ScriptCompiler::CachedDataVersionTag(); + if (metadata.v8_cache_version_tag != current_cache_version) { + // For now we only do this check for the customized snapshots - we know + // that the flags we use in the default snapshot are limited and safe + // enough so we can relax the constraints for it. + fprintf(stderr, + "Failed to load the startup snapshot because it was built with " + "a different version of V8 or with different V8 configurations.\n" + "Expected tag %" PRIx32 ", read %" PRIx32 "\n", + current_cache_version, + metadata.v8_cache_version_tag); + return false; + } } // TODO(joyeecheung): check incompatible Node.js flags. @@ -913,23 +920,91 @@ void SnapshotBuilder::InitializeIsolateParams(const SnapshotData* data, const_cast(&(data->v8_snapshot_blob_data)); } +SnapshotFlags operator|(SnapshotFlags x, SnapshotFlags y) { + return static_cast(static_cast(x) | + static_cast(y)); +} + +SnapshotFlags operator&(SnapshotFlags x, SnapshotFlags y) { + return static_cast(static_cast(x) & + static_cast(y)); +} + +SnapshotFlags operator|=(/* NOLINT (runtime/references) */ SnapshotFlags& x, + SnapshotFlags y) { + return x = x | y; +} + +bool WithoutCodeCache(const SnapshotFlags& flags) { + return static_cast(flags & SnapshotFlags::kWithoutCodeCache); +} + +bool WithoutCodeCache(const SnapshotConfig& config) { + return WithoutCodeCache(config.flags); +} + +std::optional ReadSnapshotConfig(const char* config_path) { + std::string config_content; + int r = ReadFileSync(&config_content, config_path); + if (r != 0) { + FPrintF(stderr, + "Cannot read snapshot configuration from %s: %s\n", + config_path, + uv_strerror(r)); + return std::nullopt; + } + + JSONParser parser; + if (!parser.Parse(config_content)) { + FPrintF(stderr, "Cannot parse JSON from %s\n", config_path); + return std::nullopt; + } + + SnapshotConfig result; + result.builder_script_path = parser.GetTopLevelStringField("builder"); + if (!result.builder_script_path.has_value()) { + FPrintF(stderr, + "\"builder\" field of %s is not a non-empty string\n", + config_path); + return std::nullopt; + } + + std::optional WithoutCodeCache = + parser.GetTopLevelBoolField("withoutCodeCache"); + if (!WithoutCodeCache.has_value()) { + FPrintF(stderr, + "\"withoutCodeCache\" field of %s is not a boolean\n", + config_path); + return std::nullopt; + } + if (WithoutCodeCache.value()) { + result.flags |= SnapshotFlags::kWithoutCodeCache; + } + + return result; +} + ExitCode BuildSnapshotWithoutCodeCache( SnapshotData* out, const std::vector& args, const std::vector& exec_args, - std::optional main_script) { + std::optional builder_script_content, + const SnapshotConfig& config) { + DCHECK(builder_script_content.has_value() == + config.builder_script_path.has_value()); // The default snapshot is meant to be runtime-independent and has more // restrictions. We do not enable the inspector and do not run the event // loop when building the default snapshot to avoid inconsistencies, but // we do for the fully customized one, and they are expected to fixup the // inconsistencies using v8.startupSnapshot callbacks. SnapshotMetadata::Type snapshot_type = - main_script.has_value() ? SnapshotMetadata::Type::kFullyCustomized - : SnapshotMetadata::Type::kDefault; + builder_script_content.has_value() + ? SnapshotMetadata::Type::kFullyCustomized + : SnapshotMetadata::Type::kDefault; std::vector errors; auto setup = CommonEnvironmentSetup::CreateForSnapshotting( - per_process::v8_platform.Platform(), &errors, args, exec_args); + per_process::v8_platform.Platform(), &errors, args, exec_args, config); if (!setup) { for (const std::string& err : errors) fprintf(stderr, "%s: %s\n", args[0].c_str(), err.c_str()); @@ -955,7 +1030,7 @@ ExitCode BuildSnapshotWithoutCodeCache( #if HAVE_INSPECTOR env->InitializeInspector({}); #endif - if (LoadEnvironment(env, main_script.value()).IsEmpty()) { + if (LoadEnvironment(env, builder_script_content.value()).IsEmpty()) { return ExitCode::kGenericUserError; } @@ -970,8 +1045,7 @@ ExitCode BuildSnapshotWithoutCodeCache( } } - return SnapshotBuilder::CreateSnapshot( - out, setup.get(), static_cast(snapshot_type)); + return SnapshotBuilder::CreateSnapshot(out, setup.get()); } ExitCode BuildCodeCacheFromSnapshot(SnapshotData* out, @@ -1015,28 +1089,32 @@ ExitCode SnapshotBuilder::Generate( SnapshotData* out, const std::vector& args, const std::vector& exec_args, - std::optional main_script) { - ExitCode code = - BuildSnapshotWithoutCodeCache(out, args, exec_args, main_script); + std::optional builder_script_content, + const SnapshotConfig& snapshot_config) { + ExitCode code = BuildSnapshotWithoutCodeCache( + out, args, exec_args, builder_script_content, snapshot_config); if (code != ExitCode::kNoFailure) { return code; } -#ifdef NODE_USE_NODE_CODE_CACHE - // Deserialize the snapshot to recompile code cache. We need to do this in the - // second pass because V8 requires the code cache to be compiled with a - // finalized read-only space. - return BuildCodeCacheFromSnapshot(out, args, exec_args); -#else + if (!WithoutCodeCache(snapshot_config)) { + // Deserialize the snapshot to recompile code cache. We need to do this in + // the second pass because V8 requires the code cache to be compiled with a + // finalized read-only space. + return BuildCodeCacheFromSnapshot(out, args, exec_args); + } + return ExitCode::kNoFailure; -#endif } ExitCode SnapshotBuilder::CreateSnapshot(SnapshotData* out, - CommonEnvironmentSetup* setup, - uint8_t snapshot_type_u8) { + CommonEnvironmentSetup* setup) { + const SnapshotConfig* config = setup->isolate_data()->snapshot_config(); + DCHECK_NOT_NULL(config); SnapshotMetadata::Type snapshot_type = - static_cast(snapshot_type_u8); + config->builder_script_path.has_value() + ? SnapshotMetadata::Type::kFullyCustomized + : SnapshotMetadata::Type::kDefault; Isolate* isolate = setup->isolate(); Environment* env = setup->env(); SnapshotCreator* creator = setup->snapshot_creator(); @@ -1099,8 +1177,10 @@ ExitCode SnapshotBuilder::CreateSnapshot(SnapshotData* out, } // Must be out of HandleScope - out->v8_snapshot_blob_data = - creator->CreateBlob(SnapshotCreator::FunctionCodeHandling::kKeep); + SnapshotCreator::FunctionCodeHandling handling = + WithoutCodeCache(*config) ? SnapshotCreator::FunctionCodeHandling::kClear + : SnapshotCreator::FunctionCodeHandling::kKeep; + out->v8_snapshot_blob_data = creator->CreateBlob(handling); // We must be able to rehash the blob when we restore it or otherwise // the hash seed would be fixed by V8, introducing a vulnerability. @@ -1112,7 +1192,8 @@ ExitCode SnapshotBuilder::CreateSnapshot(SnapshotData* out, per_process::metadata.versions.node, per_process::metadata.arch, per_process::metadata.platform, - v8::ScriptCompiler::CachedDataVersionTag()}; + v8::ScriptCompiler::CachedDataVersionTag(), + config->flags}; // We cannot resurrect the handles from the snapshot, so make sure that // no handles are left open in the environment after the blob is created @@ -1133,21 +1214,22 @@ ExitCode SnapshotBuilder::GenerateAsSource( const char* out_path, const std::vector& args, const std::vector& exec_args, - std::optional main_script_path, + const SnapshotConfig& config, bool use_array_literals) { - std::string main_script_content; - std::optional main_script_optional; - if (main_script_path.has_value()) { - int r = ReadFileSync(&main_script_content, main_script_path.value().data()); + std::string builder_script_content; + std::optional builder_script_optional; + if (config.builder_script_path.has_value()) { + std::string_view builder_script_path = config.builder_script_path.value(); + int r = ReadFileSync(&builder_script_content, builder_script_path.data()); if (r != 0) { FPrintF(stderr, "Cannot read main script %s for building snapshot. %s: %s", - main_script_path.value(), + builder_script_path, uv_err_name(r), uv_strerror(r)); return ExitCode::kGenericUserError; } - main_script_optional = main_script_content; + builder_script_optional = builder_script_content; } std::ofstream out(out_path, std::ios::out | std::ios::binary); @@ -1157,7 +1239,8 @@ ExitCode SnapshotBuilder::GenerateAsSource( } SnapshotData data; - ExitCode exit_code = Generate(&data, args, exec_args, main_script_optional); + ExitCode exit_code = + Generate(&data, args, exec_args, builder_script_optional, config); if (exit_code != ExitCode::kNoFailure) { return exit_code; } diff --git a/src/node_snapshotable.h b/src/node_snapshotable.h index d1f28ecf154d9b..5e281b8155c810 100644 --- a/src/node_snapshotable.h +++ b/src/node_snapshotable.h @@ -25,6 +25,9 @@ struct PropInfo { typedef size_t SnapshotIndex; +bool WithoutCodeCache(const SnapshotFlags& flags); +bool WithoutCodeCache(const SnapshotConfig& config); + // When serializing an embedder object, we'll serialize the native states // into a chunk that can be mapped into a subclass of InternalFieldInfoBase, // and pass it into the V8 callback as the payload of StartupData. @@ -154,7 +157,6 @@ class BindingData : public SnapshotableObject { AliasedUint8Array is_building_snapshot_buffer_; InternalFieldInfo* internal_field_info_ = nullptr; }; - } // namespace mksnapshot } // namespace node diff --git a/test/embedding/embedtest.cc b/test/embedding/embedtest.cc index 689891f0d1a5bf..43965b6056e455 100644 --- a/test/embedding/embedtest.cc +++ b/test/embedding/embedtest.cc @@ -68,6 +68,7 @@ int RunNodeInstance(MultiIsolatePlatform* platform, // --embedder-snapshot-blob blob-path // --embedder-snapshot-create // [--embedder-snapshot-as-file] + // [--without-code-cache] // Running snapshot: // embedtest --embedder-snapshot-blob blob-path // [--embedder-snapshot-as-file] @@ -80,6 +81,7 @@ int RunNodeInstance(MultiIsolatePlatform* platform, std::vector filtered_args; bool is_building_snapshot = false; bool snapshot_as_file = false; + std::optional snapshot_config; std::string snapshot_blob_path; for (size_t i = 0; i < args.size(); ++i) { const std::string& arg = args[i]; @@ -87,6 +89,13 @@ int RunNodeInstance(MultiIsolatePlatform* platform, is_building_snapshot = true; } else if (arg == "--embedder-snapshot-as-file") { snapshot_as_file = true; + } else if (arg == "--without-code-cache") { + if (!snapshot_config.has_value()) { + snapshot_config = node::SnapshotConfig{}; + } + snapshot_config.value().flags = static_cast( + static_cast(snapshot_config.value().flags) | + static_cast(node::SnapshotFlags::kWithoutCodeCache)); } else if (arg == "--embedder-snapshot-blob") { assert(i + 1 < args.size()); snapshot_blob_path = args[i + 1]; @@ -130,14 +139,23 @@ int RunNodeInstance(MultiIsolatePlatform* platform, } std::vector errors; - std::unique_ptr setup = - snapshot - ? CommonEnvironmentSetup::CreateFromSnapshot( - platform, &errors, snapshot.get(), filtered_args, exec_args) - : is_building_snapshot ? CommonEnvironmentSetup::CreateForSnapshotting( - platform, &errors, filtered_args, exec_args) - : CommonEnvironmentSetup::Create( - platform, &errors, filtered_args, exec_args); + std::unique_ptr setup; + + if (snapshot) { + setup = CommonEnvironmentSetup::CreateFromSnapshot( + platform, &errors, snapshot.get(), filtered_args, exec_args); + } else if (is_building_snapshot) { + if (snapshot_config.has_value()) { + setup = CommonEnvironmentSetup::CreateForSnapshotting( + platform, &errors, filtered_args, exec_args, snapshot_config.value()); + } else { + setup = CommonEnvironmentSetup::CreateForSnapshotting( + platform, &errors, filtered_args, exec_args); + } + } else { + setup = CommonEnvironmentSetup::Create( + platform, &errors, filtered_args, exec_args); + } if (!setup) { for (const std::string& err : errors) fprintf(stderr, "%s: %s\n", binary_path.c_str(), err.c_str()); diff --git a/test/embedding/test-embedding.js b/test/embedding/test-embedding.js index 1fb3bc73f494cb..97ff2377c22ca9 100644 --- a/test/embedding/test-embedding.js +++ b/test/embedding/test-embedding.js @@ -78,7 +78,9 @@ function getReadFileCodeForPath(path) { } // Basic snapshot support -for (const extraSnapshotArgs of [[], ['--embedder-snapshot-as-file']]) { +for (const extraSnapshotArgs of [ + [], ['--embedder-snapshot-as-file'], ['--without-code-cache'], +]) { // readSync + eval since snapshots don't support userland require() (yet) const snapshotFixture = fixtures.path('snapshot', 'echo-args.js'); const blobPath = tmpdir.resolve('embedder-snapshot.blob'); diff --git a/test/parallel/test-snapshot-config.js b/test/parallel/test-snapshot-config.js new file mode 100644 index 00000000000000..43dfda4af7f9a5 --- /dev/null +++ b/test/parallel/test-snapshot-config.js @@ -0,0 +1,138 @@ +'use strict'; + +// This tests --build-snapshot-config. + +require('../common'); +const assert = require('assert'); +const { + spawnSyncAndExitWithoutError, + spawnSyncAndExit, +} = require('../common/child_process'); +const tmpdir = require('../common/tmpdir'); +const fixtures = require('../common/fixtures'); +const fs = require('fs'); + +const blobPath = tmpdir.resolve('snapshot.blob'); +const builderScript = fixtures.path('snapshot', 'mutate-fs.js'); +const checkFile = fixtures.path('snapshot', 'check-mutate-fs.js'); +const configPath = tmpdir.resolve('snapshot.json'); +tmpdir.refresh(); +{ + // Relative path. + spawnSyncAndExit(process.execPath, [ + '--snapshot-blob', + blobPath, + '--build-snapshot-config', + 'snapshot.json', + ], { + cwd: tmpdir.path + }, { + signal: null, + status: 1, + trim: true, + stderr: /Cannot read snapshot configuration from snapshot\.json/ + }); + + // Absolute path. + spawnSyncAndExit(process.execPath, [ + '--snapshot-blob', + blobPath, + '--build-snapshot-config', + configPath, + ], { + cwd: tmpdir.path + }, { + signal: null, + status: 1, + trim: true, + stderr: /Cannot read snapshot configuration from .+snapshot\.json/ + }); +} + +function writeConfig(config) { + fs.writeFileSync(configPath, JSON.stringify(config, null, 2), 'utf8'); +} + +{ + tmpdir.refresh(); + // Config without "builder" field should be rejected. + writeConfig({}); + spawnSyncAndExit(process.execPath, [ + '--snapshot-blob', + blobPath, + '--build-snapshot-config', + configPath, + ], { + cwd: tmpdir.path + }, { + signal: null, + status: 1, + trim: true, + stderr: /"builder" field of .+snapshot\.json is not a non-empty string/ + }); +} + +let sizeWithCache; +{ + tmpdir.refresh(); + // Create a working snapshot. + writeConfig({ builder: builderScript }); + spawnSyncAndExitWithoutError(process.execPath, [ + '--snapshot-blob', + blobPath, + '--build-snapshot-config', + configPath, + ], { + cwd: tmpdir.path + }, {}); + const stats = fs.statSync(blobPath); + assert(stats.isFile()); + sizeWithCache = stats.size; + + // Check the snapshot. + spawnSyncAndExitWithoutError(process.execPath, [ + '--snapshot-blob', + blobPath, + checkFile, + ], { + cwd: tmpdir.path + }); +} + +let sizeWithoutCache; +{ + tmpdir.refresh(); + // Create a working snapshot. + writeConfig({ builder: builderScript, withoutCodeCache: true }); + spawnSyncAndExitWithoutError(process.execPath, [ + '--snapshot-blob', + blobPath, + '--build-snapshot-config', + configPath, + ], { + env: { + ...process.env, + NODE_DEBUG_NATIVE: 'CODE_CACHE' + }, + cwd: tmpdir.path + }, {}); + const stats = fs.statSync(blobPath); + assert(stats.isFile()); + sizeWithoutCache = stats.size; + assert(sizeWithoutCache < sizeWithCache, + `sizeWithoutCache = ${sizeWithoutCache} >= sizeWithCache ${sizeWithCache}`); + // Check the snapshot. + spawnSyncAndExitWithoutError(process.execPath, [ + '--snapshot-blob', + blobPath, + checkFile, + ], { + cwd: tmpdir.path, + env: { + ...process.env, + NODE_DEBUG_NATIVE: 'CODE_CACHE' + }, + }, { + stderr: /snapshot contains 0 code cache/ + }); +} diff --git a/tools/snapshot/node_mksnapshot.cc b/tools/snapshot/node_mksnapshot.cc index 841a8ca743bcaa..b758b804017481 100644 --- a/tools/snapshot/node_mksnapshot.cc +++ b/tools/snapshot/node_mksnapshot.cc @@ -70,9 +70,9 @@ int BuildSnapshot(int argc, char* argv[]) { CHECK_EQ(result->exit_code(), 0); std::string out_path; - std::optional main_script_path = std::nullopt; + std::optional builder_script_path = std::nullopt; if (node::per_process::cli_options->per_isolate->build_snapshot) { - main_script_path = result->args()[1]; + builder_script_path = result->args()[1]; out_path = result->args()[2]; } else { out_path = result->args()[1]; @@ -84,11 +84,20 @@ int BuildSnapshot(int argc, char* argv[]) { bool use_array_literals = false; #endif + node::SnapshotConfig snapshot_config; + snapshot_config.builder_script_path = builder_script_path; + +#ifdef NODE_USE_NODE_CODE_CACHE + snapshot_config.flags = node::SnapshotFlags::kDefault; +#else + snapshot_config.flags = node::SnapshotFlags::kWithoutCodeCache; +#endif + node::ExitCode exit_code = node::SnapshotBuilder::GenerateAsSource(out_path.c_str(), result->args(), result->exec_args(), - main_script_path, + snapshot_config, use_array_literals); node::TearDownOncePerProcess(); From 0b6ab4cfe235ed15006b06803396120b77c798c7 Mon Sep 17 00:00:00 2001 From: Anna Henningsen Date: Thu, 16 Nov 2023 15:47:28 +0100 Subject: [PATCH 2/2] doc: add documentation for --build-snapshot-config --- doc/api/cli.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/doc/api/cli.md b/doc/api/cli.md index 30b8b358d12973..e7997accba19d5 100644 --- a/doc/api/cli.md +++ b/doc/api/cli.md @@ -323,6 +323,30 @@ Currently the support for run-time snapshot is experimental in that: a report in the [Node.js issue tracker][] and link to it in the [tracking issue for user-land snapshots][]. +### `--build-snapshot-config` + + + +> Stability: 1 - Experimental + +Specifies the path to a JSON configuration file which configures snapshot +creation behavior. + +The following options are currently supported: + +* `builder` {string} Required. Provides the name to the script that is executed + before building the snapshot, as if [`--build-snapshot`][] had been passed + with `builder` as the main script name. +* `withoutCodeCache` {boolean} Optional. Including the code cache reduces the + time spent on compiling functions included in the snapshot at the expense + of a bigger snapshot size and potentially breaking portability of the + snapshot. + +When using this flag, additional script files provided on the command line will +not be executed and instead be interpreted as regular command line arguments. + ### `-c`, `--check`