From 4ca398a6172731c907d41c5440727d0d272f123e Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Fri, 12 Aug 2022 15:39:39 +0800 Subject: [PATCH] src: iterate over base objects to prepare for snapshot Instead of iterating over the bindings, iterate over the base objects that are snapshottable. This allows us to snapshot base objects that are not bindings. In addition this refactors the InternalFieldInfo class to eliminate potential undefined behaviors, and renames it to InternalFieldInfoBase. The {de}serialize callbacks now expect a InternalFieldInfo struct nested in Snapshotable classes that can be used to carry serialization data around. This allows us to create structs inheriting from InternalFieldInfo for Snapshotable objects that need custom fields. PR-URL: https://github.com/nodejs/node/pull/44192 Refs: https://github.com/nodejs/node/issues/37476 Reviewed-By: Anna Henningsen Reviewed-By: Chengzhong Wu --- src/env-inl.h | 11 ----- src/env.cc | 13 +++--- src/env.h | 13 +++--- src/node_blob.cc | 22 +++++----- src/node_blob.h | 2 + src/node_file.cc | 12 ++++-- src/node_file.h | 1 + src/node_process.h | 2 + src/node_process_methods.cc | 12 ++++-- src/node_snapshotable.cc | 86 ++++++++++++++++++++----------------- src/node_snapshotable.h | 65 +++++++++++++++------------- src/node_v8.cc | 12 ++++-- src/node_v8.h | 4 +- 13 files changed, 139 insertions(+), 116 deletions(-) diff --git a/src/env-inl.h b/src/env-inl.h index f6b340d8149b88..360adffb25b06b 100644 --- a/src/env-inl.h +++ b/src/env-inl.h @@ -835,17 +835,6 @@ void Environment::ForEachBaseObject(T&& iterator) { } } -template -void Environment::ForEachBindingData(T&& iterator) { - BindingDataStore* map = static_cast( - context()->GetAlignedPointerFromEmbedderData( - ContextEmbedderIndex::kBindingListIndex)); - DCHECK_NOT_NULL(map); - for (auto& it : *map) { - iterator(it.first, it.second); - } -} - void Environment::modify_base_object_count(int64_t delta) { base_object_count_ += delta; } diff --git a/src/env.cc b/src/env.cc index 4ba976f21b4280..d92ab217acd26b 100644 --- a/src/env.cc +++ b/src/env.cc @@ -1669,7 +1669,6 @@ EnvSerializeInfo Environment::Serialize(SnapshotCreator* creator) { EnvSerializeInfo info; Local ctx = context(); - SerializeBindingData(this, creator, &info); // Currently all modules are compiled without cache in builtin snapshot // builder. info.builtins = std::vector(builtins_without_cache.begin(), @@ -1697,6 +1696,10 @@ EnvSerializeInfo Environment::Serialize(SnapshotCreator* creator) { ENVIRONMENT_STRONG_PERSISTENT_VALUES(V) #undef V + // Do this after other creator->AddData() calls so that Snapshotable objects + // can use 0 to indicate that a SnapshotIndex is invalid. + SerializeSnapshotableObjects(this, creator, &info); + info.context = creator->AddData(ctx, context()); return info; } @@ -1729,9 +1732,9 @@ std::ostream& operator<<(std::ostream& output, std::ostream& operator<<(std::ostream& output, const EnvSerializeInfo& i) { output << "{\n" - << "// -- bindings begins --\n" - << i.bindings << ",\n" - << "// -- bindings ends --\n" + << "// -- native_objects begins --\n" + << i.native_objects << ",\n" + << "// -- native_objects ends --\n" << "// -- builtins begins --\n" << i.builtins << ",\n" << "// -- builtins ends --\n" @@ -1758,7 +1761,7 @@ std::ostream& operator<<(std::ostream& output, const EnvSerializeInfo& i) { void Environment::EnqueueDeserializeRequest(DeserializeRequestCallback cb, Local holder, int index, - InternalFieldInfo* info) { + InternalFieldInfoBase* info) { DCHECK_EQ(index, BaseObject::kEmbedderType); DeserializeRequest request{cb, {isolate(), holder}, index, info}; deserialize_requests_.push_back(std::move(request)); diff --git a/src/env.h b/src/env.h index 8e466ad8da85bd..9b7a3d72594b92 100644 --- a/src/env.h +++ b/src/env.h @@ -956,19 +956,19 @@ class CleanupHookCallback { typedef void (*DeserializeRequestCallback)(v8::Local context, v8::Local holder, int index, - InternalFieldInfo* info); + InternalFieldInfoBase* info); struct DeserializeRequest { DeserializeRequestCallback cb; v8::Global holder; int index; - InternalFieldInfo* info = nullptr; // Owned by the request + InternalFieldInfoBase* info = nullptr; // Owned by the request // Move constructor DeserializeRequest(DeserializeRequest&& other) = default; }; struct EnvSerializeInfo { - std::vector bindings; + std::vector native_objects; std::vector builtins; AsyncHooks::SerializeInfo async_hooks; TickInfo::SerializeInfo tick_info; @@ -1062,7 +1062,7 @@ class Environment : public MemoryRetainer { void EnqueueDeserializeRequest(DeserializeRequestCallback cb, v8::Local holder, int index, - InternalFieldInfo* info); + InternalFieldInfoBase* info); void RunDeserializeRequests(); // Should be called before InitializeInspector() void InitializeDiagnostics(); @@ -1485,7 +1485,7 @@ class Environment : public MemoryRetainer { void RemoveUnmanagedFd(int fd); template - void ForEachBindingData(T&& iterator); + void ForEachBaseObject(T&& iterator); private: inline void ThrowError(v8::Local (*fun)(v8::Local), @@ -1643,9 +1643,6 @@ class Environment : public MemoryRetainer { std::function process_exit_handler_ { DefaultProcessExitHandler }; - template - void ForEachBaseObject(T&& iterator); - #define V(PropertyName, TypeName) v8::Global PropertyName ## _; ENVIRONMENT_STRONG_PERSISTENT_VALUES(V) #undef V diff --git a/src/node_blob.cc b/src/node_blob.cc index b2ca35ab3bb02f..d785aa60332d61 100644 --- a/src/node_blob.cc +++ b/src/node_blob.cc @@ -462,11 +462,10 @@ BlobBindingData::StoredDataObject BlobBindingData::get_data_object( return entry->second; } -void BlobBindingData::Deserialize( - Local context, - Local holder, - int index, - InternalFieldInfo* info) { +void BlobBindingData::Deserialize(Local context, + Local holder, + int index, + InternalFieldInfoBase* info) { DCHECK_EQ(index, BaseObject::kEmbedderType); HandleScope scope(context->GetIsolate()); Environment* env = Environment::GetCurrent(context); @@ -475,15 +474,18 @@ void BlobBindingData::Deserialize( CHECK_NOT_NULL(binding); } -void BlobBindingData::PrepareForSerialization( - Local context, - v8::SnapshotCreator* creator) { +bool BlobBindingData::PrepareForSerialization(Local context, + v8::SnapshotCreator* creator) { // Stored blob objects are not actually persisted. + // Return true because we need to maintain the reference to the binding from + // JS land. + return true; } -InternalFieldInfo* BlobBindingData::Serialize(int index) { +InternalFieldInfoBase* BlobBindingData::Serialize(int index) { DCHECK_EQ(index, BaseObject::kEmbedderType); - InternalFieldInfo* info = InternalFieldInfo::New(type()); + InternalFieldInfo* info = + InternalFieldInfoBase::New(type()); return info; } diff --git a/src/node_blob.h b/src/node_blob.h index 61f4da655c93cd..80e59a1c51c0bd 100644 --- a/src/node_blob.h +++ b/src/node_blob.h @@ -146,6 +146,8 @@ class BlobBindingData : public SnapshotableObject { public: explicit BlobBindingData(Environment* env, v8::Local wrap); + using InternalFieldInfo = InternalFieldInfoBase; + SERIALIZABLE_OBJECT_METHODS() static constexpr FastStringKey type_name{"node::BlobBindingData"}; diff --git a/src/node_file.cc b/src/node_file.cc index 5d3eebd1d6edfa..87d78542334554 100644 --- a/src/node_file.cc +++ b/src/node_file.cc @@ -2400,7 +2400,7 @@ BindingData::BindingData(Environment* env, v8::Local wrap) void BindingData::Deserialize(Local context, Local holder, int index, - InternalFieldInfo* info) { + InternalFieldInfoBase* info) { DCHECK_EQ(index, BaseObject::kEmbedderType); HandleScope scope(context->GetIsolate()); Environment* env = Environment::GetCurrent(context); @@ -2408,18 +2408,22 @@ void BindingData::Deserialize(Local context, CHECK_NOT_NULL(binding); } -void BindingData::PrepareForSerialization(Local context, +bool BindingData::PrepareForSerialization(Local context, v8::SnapshotCreator* creator) { CHECK(file_handle_read_wrap_freelist.empty()); // We'll just re-initialize the buffers in the constructor since their // contents can be thrown away once consumed in the previous call. stats_field_array.Release(); stats_field_bigint_array.Release(); + // Return true because we need to maintain the reference to the binding from + // JS land. + return true; } -InternalFieldInfo* BindingData::Serialize(int index) { +InternalFieldInfoBase* BindingData::Serialize(int index) { DCHECK_EQ(index, BaseObject::kEmbedderType); - InternalFieldInfo* info = InternalFieldInfo::New(type()); + InternalFieldInfo* info = + InternalFieldInfoBase::New(type()); return info; } diff --git a/src/node_file.h b/src/node_file.h index 45a1ad2e6ebaf4..f30bd5e8a58d97 100644 --- a/src/node_file.h +++ b/src/node_file.h @@ -23,6 +23,7 @@ class BindingData : public SnapshotableObject { std::vector> file_handle_read_wrap_freelist; + using InternalFieldInfo = InternalFieldInfoBase; SERIALIZABLE_OBJECT_METHODS() static constexpr FastStringKey type_name{"node::fs::BindingData"}; static constexpr EmbedderObjectType type_int = diff --git a/src/node_process.h b/src/node_process.h index 68956cb0ac3c60..7f88228e4c8163 100644 --- a/src/node_process.h +++ b/src/node_process.h @@ -50,6 +50,8 @@ class BindingData : public SnapshotableObject { void AddMethods(); static void RegisterExternalReferences(ExternalReferenceRegistry* registry); + using InternalFieldInfo = InternalFieldInfoBase; + SERIALIZABLE_OBJECT_METHODS() static constexpr FastStringKey type_name{"node::process::BindingData"}; static constexpr EmbedderObjectType type_int = diff --git a/src/node_process_methods.cc b/src/node_process_methods.cc index ae5f38ebf2c6b6..d47d0b2777ba94 100644 --- a/src/node_process_methods.cc +++ b/src/node_process_methods.cc @@ -524,23 +524,27 @@ void BindingData::SlowNumber(const v8::FunctionCallbackInfo& args) { NumberImpl(FromJSObject(args.Holder())); } -void BindingData::PrepareForSerialization(Local context, +bool BindingData::PrepareForSerialization(Local context, v8::SnapshotCreator* creator) { // It's not worth keeping. // Release it, we will recreate it when the instance is dehydrated. array_buffer_.Reset(); + // Return true because we need to maintain the reference to the binding from + // JS land. + return true; } -InternalFieldInfo* BindingData::Serialize(int index) { +InternalFieldInfoBase* BindingData::Serialize(int index) { DCHECK_EQ(index, BaseObject::kEmbedderType); - InternalFieldInfo* info = InternalFieldInfo::New(type()); + InternalFieldInfo* info = + InternalFieldInfoBase::New(type()); return info; } void BindingData::Deserialize(Local context, Local holder, int index, - InternalFieldInfo* info) { + InternalFieldInfoBase* info) { DCHECK_EQ(index, BaseObject::kEmbedderType); v8::HandleScope scope(context->GetIsolate()); Environment* env = Environment::GetCurrent(context); diff --git a/src/node_snapshotable.cc b/src/node_snapshotable.cc index a32ba76d316b25..f74e8331f35ed7 100644 --- a/src/node_snapshotable.cc +++ b/src/node_snapshotable.cc @@ -638,7 +638,7 @@ template <> EnvSerializeInfo FileReader::Read() { per_process::Debug(DebugCategory::MKSNAPSHOT, "Read()\n"); EnvSerializeInfo result; - result.bindings = ReadVector(); + result.native_objects = ReadVector(); result.builtins = ReadVector(); result.async_hooks = Read(); result.tick_info = Read(); @@ -661,7 +661,7 @@ size_t FileWriter::Write(const EnvSerializeInfo& data) { } // Use += here to ensure order of evaluation. - size_t written_total = WriteVector(data.bindings); + size_t written_total = WriteVector(data.native_objects); written_total += WriteVector(data.builtins); written_total += Write(data.async_hooks); written_total += Write(data.tick_info); @@ -1179,17 +1179,6 @@ const char* SnapshotableObject::GetTypeNameChars() const { } } -bool IsSnapshotableType(FastStringKey key) { -#define V(PropertyName, NativeTypeName) \ - if (key == NativeTypeName::type_name) { \ - return true; \ - } - SERIALIZABLE_OBJECT_TYPES(V) -#undef V - - return false; -} - void DeserializeNodeInternalFields(Local holder, int index, StartupData payload, @@ -1212,10 +1201,10 @@ void DeserializeNodeInternalFields(Local holder, DCHECK_EQ(index, BaseObject::kEmbedderType); Environment* env_ptr = static_cast(env); - const InternalFieldInfo* info = - reinterpret_cast(payload.data); + const InternalFieldInfoBase* info = + reinterpret_cast(payload.data); // TODO(joyeecheung): we can add a constant kNodeEmbedderId to the - // beginning of every InternalFieldInfo to ensure that we don't + // beginning of every InternalFieldInfoBase to ensure that we don't // step on payloads that were not serialized by Node.js. switch (info->type) { #define V(PropertyName, NativeTypeName) \ @@ -1225,12 +1214,25 @@ void DeserializeNodeInternalFields(Local holder, (*holder), \ NativeTypeName::type_name.c_str()); \ env_ptr->EnqueueDeserializeRequest( \ - NativeTypeName::Deserialize, holder, index, info->Copy()); \ + NativeTypeName::Deserialize, \ + holder, \ + index, \ + info->Copy()); \ break; \ } SERIALIZABLE_OBJECT_TYPES(V) #undef V - default: { UNREACHABLE(); } + default: { + // This should only be reachable during development when trying to + // deserialize a snapshot blob built by a version of Node.js that + // has more recognizable EmbedderObjectTypes than the deserializing + // Node.js binary. + fprintf(stderr, + "Unknown embedder object type %" PRIu8 ", possibly caused by " + "mismatched Node.js versions\n", + static_cast(info->type)); + ABORT(); + } } } @@ -1263,17 +1265,17 @@ StartupData SerializeNodeContextInternalFields(Local holder, static_cast(index), *holder); - void* binding_ptr = + void* native_ptr = holder->GetAlignedPointerFromInternalField(BaseObject::kSlot); - per_process::Debug(DebugCategory::MKSNAPSHOT, "binding = %p\n", binding_ptr); - DCHECK(static_cast(binding_ptr)->is_snapshotable()); - SnapshotableObject* obj = static_cast(binding_ptr); + per_process::Debug(DebugCategory::MKSNAPSHOT, "native = %p\n", native_ptr); + DCHECK(static_cast(native_ptr)->is_snapshotable()); + SnapshotableObject* obj = static_cast(native_ptr); per_process::Debug(DebugCategory::MKSNAPSHOT, "Object %p is %s, ", *holder, obj->GetTypeNameChars()); - InternalFieldInfo* info = obj->Serialize(index); + InternalFieldInfoBase* info = obj->Serialize(index); per_process::Debug(DebugCategory::MKSNAPSHOT, "payload size=%d\n", @@ -1282,31 +1284,35 @@ StartupData SerializeNodeContextInternalFields(Local holder, static_cast(info->length)}; } -void SerializeBindingData(Environment* env, - SnapshotCreator* creator, - EnvSerializeInfo* info) { +void SerializeSnapshotableObjects(Environment* env, + SnapshotCreator* creator, + EnvSerializeInfo* info) { uint32_t i = 0; - env->ForEachBindingData([&](FastStringKey key, - BaseObjectPtr binding) { + env->ForEachBaseObject([&](BaseObject* obj) { + // If there are any BaseObjects that are not snapshotable left + // during context serialization, V8 would crash due to unregistered + // global handles and print detailed information about them. + if (!obj->is_snapshotable()) { + return; + } + SnapshotableObject* ptr = static_cast(obj); + + const char* type_name = ptr->GetTypeNameChars(); per_process::Debug(DebugCategory::MKSNAPSHOT, - "Serialize binding %i (%p), object=%p, type=%s\n", + "Serialize snapshotable object %i (%p), " + "object=%p, type=%s\n", static_cast(i), - binding.get(), - *(binding->object()), - key.c_str()); + ptr, + *(ptr->object()), + type_name); - if (IsSnapshotableType(key)) { - SnapshotIndex index = creator->AddData(env->context(), binding->object()); + if (ptr->PrepareForSerialization(env->context(), creator)) { + SnapshotIndex index = creator->AddData(env->context(), obj->object()); per_process::Debug(DebugCategory::MKSNAPSHOT, "Serialized with index=%d\n", static_cast(index)); - info->bindings.push_back({key.c_str(), i, index}); - SnapshotableObject* ptr = static_cast(binding.get()); - ptr->PrepareForSerialization(env->context(), creator); - } else { - UNREACHABLE(); + info->native_objects.push_back({type_name, i, index}); } - i++; }); } diff --git a/src/node_snapshotable.h b/src/node_snapshotable.h index ec560b6d4fc633..f19a7e9c77f8e0 100644 --- a/src/node_snapshotable.h +++ b/src/node_snapshotable.h @@ -21,46 +21,54 @@ class ExternalReferenceRegistry; V(process_binding_data, process::BindingData) enum class EmbedderObjectType : uint8_t { - k_default = 0, #define V(PropertyName, NativeType) k_##PropertyName, SERIALIZABLE_OBJECT_TYPES(V) #undef V }; +typedef size_t SnapshotIndex; + // When serializing an embedder object, we'll serialize the native states -// into a chunk that can be mapped into a subclass of InternalFieldInfo, +// into a chunk that can be mapped into a subclass of InternalFieldInfoBase, // and pass it into the V8 callback as the payload of StartupData. // The memory chunk looks like this: // // [ type ] - EmbedderObjectType (a uint8_t) // [ length ] - a size_t // [ ... ] - custom bytes of size |length - header size| -struct InternalFieldInfo { +struct InternalFieldInfoBase { + public: EmbedderObjectType type; size_t length; - InternalFieldInfo() = delete; - - static InternalFieldInfo* New(EmbedderObjectType type) { - return New(type, sizeof(InternalFieldInfo)); - } - - static InternalFieldInfo* New(EmbedderObjectType type, size_t length) { - InternalFieldInfo* result = - reinterpret_cast(::operator new[](length)); + template + static T* New(EmbedderObjectType type) { + static_assert(std::is_base_of_v || + std::is_same_v, + "Can only accept InternalFieldInfoBase subclasses"); + void* buf = ::operator new[](sizeof(T)); + T* result = new (buf) T; result->type = type; - result->length = length; + result->length = sizeof(T); return result; } - InternalFieldInfo* Copy() const { - InternalFieldInfo* result = - reinterpret_cast(::operator new[](length)); - memcpy(result, this, length); + template + T* Copy() const { + static_assert(std::is_base_of_v || + std::is_same_v, + "Can only accept InternalFieldInfoBase subclasses"); + static_assert(std::is_trivially_copyable_v, + "Can only memcpy trivially copyable class"); + void* buf = ::operator new[](sizeof(T)); + T* result = new (buf) T; + memcpy(result, this, sizeof(T)); return result; } void Delete() { ::operator delete[](this); } + + InternalFieldInfoBase() = default; }; // An interface for snapshotable native objects to inherit from. @@ -73,7 +81,7 @@ struct InternalFieldInfo { // that needs a V8 context. // - Serialize(): This would be called during context serialization, // once for each embedder field of the object. -// Allocate and construct an InternalFieldInfo object that contains +// Allocate and construct an InternalFieldInfoBase object that contains // data that can be used to deserialize native states. // - Deserialize(): This would be called after the context is // deserialized and the object graph is complete, once for each @@ -83,12 +91,13 @@ class SnapshotableObject : public BaseObject { public: SnapshotableObject(Environment* env, v8::Local wrap, - EmbedderObjectType type = EmbedderObjectType::k_default); + EmbedderObjectType type); const char* GetTypeNameChars() const; - virtual void PrepareForSerialization(v8::Local context, + // If returns false, the object will not be serialized. + virtual bool PrepareForSerialization(v8::Local context, v8::SnapshotCreator* creator) = 0; - virtual InternalFieldInfo* Serialize(int index) = 0; + virtual InternalFieldInfoBase* Serialize(int index) = 0; bool is_snapshotable() const override { return true; } // We'll make sure that the type is set in the constructor EmbedderObjectType type() { return type_; } @@ -98,13 +107,13 @@ class SnapshotableObject : public BaseObject { }; #define SERIALIZABLE_OBJECT_METHODS() \ - void PrepareForSerialization(v8::Local context, \ + bool PrepareForSerialization(v8::Local context, \ v8::SnapshotCreator* creator) override; \ - InternalFieldInfo* Serialize(int index) override; \ + InternalFieldInfoBase* Serialize(int index) override; \ static void Deserialize(v8::Local context, \ v8::Local holder, \ int index, \ - InternalFieldInfo* info); + InternalFieldInfoBase* info); v8::StartupData SerializeNodeContextInternalFields(v8::Local holder, int index, @@ -113,11 +122,9 @@ void DeserializeNodeInternalFields(v8::Local holder, int index, v8::StartupData payload, void* env); -void SerializeBindingData(Environment* env, - v8::SnapshotCreator* creator, - EnvSerializeInfo* info); - -bool IsSnapshotableType(FastStringKey key); +void SerializeSnapshotableObjects(Environment* env, + v8::SnapshotCreator* creator, + EnvSerializeInfo* info); } // namespace node #endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS diff --git a/src/node_v8.cc b/src/node_v8.cc index e8bf3fc13a6b77..001d3464ec0e43 100644 --- a/src/node_v8.cc +++ b/src/node_v8.cc @@ -111,19 +111,22 @@ BindingData::BindingData(Environment* env, Local obj) .Check(); } -void BindingData::PrepareForSerialization(Local context, +bool BindingData::PrepareForSerialization(Local context, v8::SnapshotCreator* creator) { // We'll just re-initialize the buffers in the constructor since their // contents can be thrown away once consumed in the previous call. heap_statistics_buffer.Release(); heap_space_statistics_buffer.Release(); heap_code_statistics_buffer.Release(); + // Return true because we need to maintain the reference to the binding from + // JS land. + return true; } void BindingData::Deserialize(Local context, Local holder, int index, - InternalFieldInfo* info) { + InternalFieldInfoBase* info) { DCHECK_EQ(index, BaseObject::kEmbedderType); HandleScope scope(context->GetIsolate()); Environment* env = Environment::GetCurrent(context); @@ -131,9 +134,10 @@ void BindingData::Deserialize(Local context, CHECK_NOT_NULL(binding); } -InternalFieldInfo* BindingData::Serialize(int index) { +InternalFieldInfoBase* BindingData::Serialize(int index) { DCHECK_EQ(index, BaseObject::kEmbedderType); - InternalFieldInfo* info = InternalFieldInfo::New(type()); + InternalFieldInfo* info = + InternalFieldInfoBase::New(type()); return info; } diff --git a/src/node_v8.h b/src/node_v8.h index 5de8cb3f9c643c..18b3621a2a5d6a 100644 --- a/src/node_v8.h +++ b/src/node_v8.h @@ -11,13 +11,15 @@ namespace node { class Environment; -struct InternalFieldInfo; +struct InternalFieldInfoBase; namespace v8_utils { class BindingData : public SnapshotableObject { public: BindingData(Environment* env, v8::Local obj); + using InternalFieldInfo = InternalFieldInfoBase; + SERIALIZABLE_OBJECT_METHODS() static constexpr FastStringKey type_name{"node::v8::BindingData"}; static constexpr EmbedderObjectType type_int =