From 50fb2462f40bdab864341fbb726f6c27335d75bd Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Thu, 1 Dec 2022 09:26:59 -0500 Subject: [PATCH 1/3] deps: V8: cherry-pick bc831f8ba33b Original commit message: [fastcall] Implement support for onebyte string arguments This CL adds one byte string specialization support for fast API call arguments. It introduces a kOneByteString variant to CTypeInfo. We see a ~6x improvement in Deno's TextEncoder#encode microbenchmark. Rendered results: https://divy-v8-patches.deno.dev/ Bug: chromium:1052746 Change-Id: I47c3a9e101cd18ddc6ad58f627db3a34231b60f7 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4036884 Reviewed-by: Toon Verwaest Reviewed-by: Maya Lekova Commit-Queue: Maya Lekova Cr-Commit-Position: refs/heads/main@{#84552} Refs: https://github.com/v8/v8/commit/bc831f8ba33b79e2eb670faf1f84c4e39aeb0f9f --- common.gypi | 2 +- deps/v8/include/v8-fast-api-calls.h | 21 ++++- deps/v8/src/codegen/machine-type.h | 1 + .../src/compiler/effect-control-linearizer.cc | 45 ++++++++++ deps/v8/src/compiler/fast-api-calls.cc | 1 + deps/v8/src/compiler/simplified-lowering.cc | 1 + deps/v8/src/d8/d8-test.cc | 47 +++++++++++ .../mjsunit/compiler/fast-api-calls-string.js | 84 +++++++++++++++++++ 8 files changed, 200 insertions(+), 2 deletions(-) create mode 100644 deps/v8/test/mjsunit/compiler/fast-api-calls-string.js diff --git a/common.gypi b/common.gypi index c8ecf1ffae8a9b..5bc3d8666abc85 100644 --- a/common.gypi +++ b/common.gypi @@ -36,7 +36,7 @@ # Reset this number to 0 on major V8 upgrades. # Increment by one for each non-official patch applied to deps/v8. - 'v8_embedder_string': '-node.5', + 'v8_embedder_string': '-node.6', ##### V8 defaults for Node.js ##### diff --git a/deps/v8/include/v8-fast-api-calls.h b/deps/v8/include/v8-fast-api-calls.h index 1826f133210477..9ea43fe2535397 100644 --- a/deps/v8/include/v8-fast-api-calls.h +++ b/deps/v8/include/v8-fast-api-calls.h @@ -248,6 +248,7 @@ class CTypeInfo { kFloat32, kFloat64, kV8Value, + kSeqOneByteString, kApiObject, // This will be deprecated once all users have // migrated from v8::ApiObject to v8::Local. kAny, // This is added to enable untyped representation of fast @@ -379,6 +380,11 @@ struct FastApiArrayBuffer { size_t byte_length; }; +struct FastOneByteString { + const char* data; + uint32_t length; +}; + class V8_EXPORT CFunctionInfo { public: // Construct a struct to hold a CFunction's type information. @@ -438,6 +444,7 @@ struct AnyCType { const FastApiTypedArray* uint64_ta_value; const FastApiTypedArray* float_ta_value; const FastApiTypedArray* double_ta_value; + const FastOneByteString* string_value; FastApiCallbackOptions* options_value; }; }; @@ -614,7 +621,7 @@ class CFunctionInfoImpl : public CFunctionInfo { kReturnType == CTypeInfo::Type::kFloat32 || kReturnType == CTypeInfo::Type::kFloat64 || kReturnType == CTypeInfo::Type::kAny, - "64-bit int and api object values are not currently " + "64-bit int, string and api object values are not currently " "supported return types."); } @@ -735,6 +742,18 @@ struct TypeInfoHelper { } }; +template <> +struct TypeInfoHelper { + static constexpr CTypeInfo::Flags Flags() { return CTypeInfo::Flags::kNone; } + + static constexpr CTypeInfo::Type Type() { + return CTypeInfo::Type::kSeqOneByteString; + } + static constexpr CTypeInfo::SequenceType SequenceType() { + return CTypeInfo::SequenceType::kScalar; + } +}; + #define STATIC_ASSERT_IMPLIES(COND, ASSERTION, MSG) \ static_assert(((COND) == 0) || (ASSERTION), MSG) diff --git a/deps/v8/src/codegen/machine-type.h b/deps/v8/src/codegen/machine-type.h index 29d7de75838e5e..38834d2394ce0d 100644 --- a/deps/v8/src/codegen/machine-type.h +++ b/deps/v8/src/codegen/machine-type.h @@ -315,6 +315,7 @@ class MachineType { case CTypeInfo::Type::kFloat64: return MachineType::Float64(); case CTypeInfo::Type::kV8Value: + case CTypeInfo::Type::kSeqOneByteString: case CTypeInfo::Type::kApiObject: return MachineType::AnyTagged(); } diff --git a/deps/v8/src/compiler/effect-control-linearizer.cc b/deps/v8/src/compiler/effect-control-linearizer.cc index 055e3484e72d99..66973abcbcf8fa 100644 --- a/deps/v8/src/compiler/effect-control-linearizer.cc +++ b/deps/v8/src/compiler/effect-control-linearizer.cc @@ -5395,6 +5395,50 @@ Node* EffectControlLinearizer::AdaptFastCallArgument( case CTypeInfo::Type::kFloat32: { return __ TruncateFloat64ToFloat32(node); } + case CTypeInfo::Type::kSeqOneByteString: { + // Check that the value is a HeapObject. + Node* value_is_smi = ObjectIsSmi(node); + __ GotoIf(value_is_smi, if_error); + + Node* map = __ LoadField(AccessBuilder::ForMap(), node); + Node* instance_type = + __ LoadField(AccessBuilder::ForMapInstanceType(), map); + + Node* encoding = __ Word32And( + instance_type, + __ Int32Constant(kStringRepresentationAndEncodingMask)); + + Node* is_onebytestring = __ Word32Equal( + encoding, __ Int32Constant(kSeqOneByteStringTag)); + __ GotoIfNot(is_onebytestring, if_error); + + Node* length_in_bytes = + __ LoadField(AccessBuilder::ForStringLength(), node); + Node* data_ptr = __ IntPtrAdd( + node, __ IntPtrConstant(SeqOneByteString::kHeaderSize - + kHeapObjectTag)); + + constexpr int kAlign = alignof(FastOneByteString); + constexpr int kSize = sizeof(FastOneByteString); + static_assert(kSize == sizeof(uintptr_t) + sizeof(size_t), + "The size of " + "FastOneByteString isn't equal to the sum of its " + "expected members."); + Node* stack_slot = __ StackSlot(kSize, kAlign); + + __ Store(StoreRepresentation(MachineType::PointerRepresentation(), + kNoWriteBarrier), + stack_slot, 0, data_ptr); + __ Store(StoreRepresentation(MachineRepresentation::kWord32, + kNoWriteBarrier), + stack_slot, sizeof(size_t), length_in_bytes); + + static_assert(sizeof(uintptr_t) == sizeof(size_t), + "The string length can't " + "fit the PointerRepresentation used to store it."); + + return stack_slot; + } default: { return node; } @@ -5600,6 +5644,7 @@ Node* EffectControlLinearizer::LowerFastApiCall(Node* node) { case CTypeInfo::Type::kFloat64: return ChangeFloat64ToTagged( c_call_result, CheckForMinusZeroMode::kCheckForMinusZero); + case CTypeInfo::Type::kSeqOneByteString: case CTypeInfo::Type::kV8Value: case CTypeInfo::Type::kApiObject: case CTypeInfo::Type::kUint8: diff --git a/deps/v8/src/compiler/fast-api-calls.cc b/deps/v8/src/compiler/fast-api-calls.cc index 4e3f92a99e5c27..4279048ee17f50 100644 --- a/deps/v8/src/compiler/fast-api-calls.cc +++ b/deps/v8/src/compiler/fast-api-calls.cc @@ -29,6 +29,7 @@ ElementsKind GetTypedArrayElementsKind(CTypeInfo::Type type) { case CTypeInfo::Type::kFloat64: return FLOAT64_ELEMENTS; case CTypeInfo::Type::kVoid: + case CTypeInfo::Type::kSeqOneByteString: case CTypeInfo::Type::kBool: case CTypeInfo::Type::kV8Value: case CTypeInfo::Type::kApiObject: diff --git a/deps/v8/src/compiler/simplified-lowering.cc b/deps/v8/src/compiler/simplified-lowering.cc index 17200099caf5af..2d4766380995b6 100644 --- a/deps/v8/src/compiler/simplified-lowering.cc +++ b/deps/v8/src/compiler/simplified-lowering.cc @@ -1961,6 +1961,7 @@ class RepresentationSelector { case CTypeInfo::Type::kFloat64: return UseInfo::CheckedNumberAsFloat64(kDistinguishZeros, feedback); case CTypeInfo::Type::kV8Value: + case CTypeInfo::Type::kSeqOneByteString: case CTypeInfo::Type::kApiObject: return UseInfo::AnyTagged(); } diff --git a/deps/v8/src/d8/d8-test.cc b/deps/v8/src/d8/d8-test.cc index 068a2b2329891c..a4d5abdc442df3 100644 --- a/deps/v8/src/d8/d8-test.cc +++ b/deps/v8/src/d8/d8-test.cc @@ -42,6 +42,43 @@ class FastCApiObject { public: static FastCApiObject& instance(); +#ifdef V8_USE_SIMULATOR_WITH_GENERIC_C_CALLS + static AnyCType CopyStringFastCallbackPatch(AnyCType receiver, + AnyCType should_fallback, + AnyCType source, AnyCType out, + AnyCType options) { + AnyCType ret; + CopyStringFastCallback(receiver.object_value, should_fallback.bool_value, + *source.string_value, *out.uint8_ta_value, + *options.options_value); + return ret; + } + +#endif // V8_USE_SIMULATOR_WITH_GENERIC_C_CALLS + static void CopyStringFastCallback(Local receiver, + bool should_fallback, + const FastOneByteString& source, + const FastApiTypedArray& out, + FastApiCallbackOptions& options) { + FastCApiObject* self = UnwrapObject(receiver); + self->fast_call_count_++; + + if (should_fallback) { + options.fallback = true; + } else { + options.fallback = false; + } + + uint8_t* memory = nullptr; + CHECK(out.getStorageIfAligned(&memory)); + memcpy(memory, source.data, source.length); + } + + static void CopyStringSlowCallback(const FunctionCallbackInfo& args) { + FastCApiObject* self = UnwrapObject(args.This()); + CHECK_SELF_OR_THROW(); + self->slow_call_count_++; + } #ifdef V8_USE_SIMULATOR_WITH_GENERIC_C_CALLS static AnyCType AddAllFastCallbackPatch(AnyCType receiver, AnyCType should_fallback, @@ -1084,6 +1121,16 @@ Local Shell::CreateTestFastCApiTemplate(Isolate* isolate) { PerIsolateData::Get(isolate)->SetTestApiObjectCtor(api_obj_ctor); Local signature = Signature::New(isolate, api_obj_ctor); { + CFunction copy_str_func = CFunction::Make( + FastCApiObject::CopyStringFastCallback V8_IF_USE_SIMULATOR( + FastCApiObject::CopyStringFastCallbackPatch)); + api_obj_ctor->PrototypeTemplate()->Set( + isolate, "copy_string", + FunctionTemplate::New(isolate, FastCApiObject::CopyStringSlowCallback, + Local(), signature, 1, + ConstructorBehavior::kThrow, + SideEffectType::kHasSideEffect, ©_str_func)); + CFunction add_all_c_func = CFunction::Make(FastCApiObject::AddAllFastCallback V8_IF_USE_SIMULATOR( FastCApiObject::AddAllFastCallbackPatch)); diff --git a/deps/v8/test/mjsunit/compiler/fast-api-calls-string.js b/deps/v8/test/mjsunit/compiler/fast-api-calls-string.js new file mode 100644 index 00000000000000..70e33c9a2d5f33 --- /dev/null +++ b/deps/v8/test/mjsunit/compiler/fast-api-calls-string.js @@ -0,0 +1,84 @@ +// Copyright 2022 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// This file excercises one byte string support for fast API calls. + +// Flags: --turbo-fast-api-calls --expose-fast-api --allow-natives-syntax --turbofan +// --always-turbofan is disabled because we rely on particular feedback for +// optimizing to the fastest path. +// Flags: --no-always-turbofan +// The test relies on optimizing/deoptimizing at predictable moments, so +// it's not suitable for deoptimization fuzzing. +// Flags: --deopt-every-n-times=0 + +assertThrows(() => d8.test.FastCAPI()); +const fast_c_api = new d8.test.FastCAPI(); + +function assertSlowCall(input) { + assertEquals(new Uint8Array(input.length), copy_string(false, input)); +} + +function assertFastCall(input) { + const bytes = Uint8Array.from(input, c => c.charCodeAt(0)); + assertEquals(bytes, copy_string(false, input)); +} + +function copy_string(should_fallback = false, input) { + const buffer = new Uint8Array(input.length); + fast_c_api.copy_string(should_fallback, input, buffer); + return buffer; +} + +%PrepareFunctionForOptimization(copy_string); +assertSlowCall('Hello'); +%OptimizeFunctionOnNextCall(copy_string); + +fast_c_api.reset_counts(); +assertFastCall('Hello'); +assertFastCall(''); +assertFastCall(['Hello', 'World'].join('')); +assertOptimized(copy_string); +assertEquals(3, fast_c_api.fast_call_count()); +assertEquals(0, fast_c_api.slow_call_count()); + +// Fall back for twobyte strings. +fast_c_api.reset_counts(); +assertSlowCall('Hello\u{10000}'); +assertSlowCall('नमस्ते'); +assertSlowCall(['नमस्ते', 'World'].join('')); +assertOptimized(copy_string); +assertEquals(0, fast_c_api.fast_call_count()); +assertEquals(3, fast_c_api.slow_call_count()); + +// Fall back for cons strings. +function getTwoByteString() { + return '\u1234t'; +} +function getCons() { + return 'hello' + getTwoByteString() +} + +fast_c_api.reset_counts(); +assertSlowCall(getCons()); +assertOptimized(copy_string); +assertEquals(0, fast_c_api.fast_call_count()); +assertEquals(1, fast_c_api.slow_call_count()); + +// Fall back for sliced strings. +fast_c_api.reset_counts(); +function getSliced() { + return getCons().slice(1); +} +assertSlowCall(getSliced()); +assertOptimized(copy_string); +assertEquals(0, fast_c_api.fast_call_count()); +assertEquals(1, fast_c_api.slow_call_count()); + +// Fall back for SMI and non-string inputs. +fast_c_api.reset_counts(); +assertSlowCall(1); +assertSlowCall({}); +assertSlowCall(new Uint8Array(1)); +assertEquals(0, fast_c_api.fast_call_count()); +assertEquals(3, fast_c_api.slow_call_count()); From 76017588f645e07675507eab829509a13bc429ec Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Fri, 2 Dec 2022 10:10:20 -0500 Subject: [PATCH 2/3] src: add encoding_methods with fast api Co-authored-by: Anna Henningsen --- benchmark/util/text-encoder.js | 20 +++- lib/internal/bootstrap/loaders.js | 1 + lib/internal/encoding.js | 11 +- node.gyp | 1 + src/node_binding.cc | 1 + src/node_buffer.cc | 73 ------------ src/node_encoding.cc | 150 ++++++++++++++++++++++++ src/node_external_reference.h | 8 ++ test/parallel/test-bootstrap-modules.js | 1 + test/parallel/test-util-text-encoder.js | 50 ++++++++ 10 files changed, 234 insertions(+), 82 deletions(-) create mode 100644 src/node_encoding.cc create mode 100644 test/parallel/test-util-text-encoder.js diff --git a/benchmark/util/text-encoder.js b/benchmark/util/text-encoder.js index ca3cb827779be3..e3c900615a40a0 100644 --- a/benchmark/util/text-encoder.js +++ b/benchmark/util/text-encoder.js @@ -2,17 +2,27 @@ const common = require('../common.js'); -const BASE = 'string\ud801'; - const bench = common.createBenchmark(main, { - len: [256, 1024, 1024 * 32], + len: [0, 256, 1024, 1024 * 32], n: [1e4], + type: ['v8-one-byte-string', 'v8-two-byte-string'], op: ['encode', 'encodeInto'] }); -function main({ n, op, len }) { +function main({ n, op, len, type }) { const encoder = new TextEncoder(); - const input = BASE.repeat(len); + let base = ''; + + switch (type) { + case 'v8-one-byte-string': + base = 'a'; + break; + case 'v8-two-byte-string': + base = 'ğ'; + break; + } + + const input = base.repeat(len); const subarray = new Uint8Array(len); bench.start(); diff --git a/lib/internal/bootstrap/loaders.js b/lib/internal/bootstrap/loaders.js index f25fb7c9f44391..1782511fecf6d8 100644 --- a/lib/internal/bootstrap/loaders.js +++ b/lib/internal/bootstrap/loaders.js @@ -85,6 +85,7 @@ const internalBindingAllowlist = new SafeSet([ 'constants', 'contextify', 'crypto', + 'encoding_methods', 'fs', 'fs_event_wrap', 'http_parser', diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index 0e3c44d2e84fbb..1205cf3ba8e5b9 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -51,11 +51,14 @@ const { } = require('internal/validators'); const { - encodeInto, - encodeUtf8String, decodeUTF8, } = internalBinding('buffer'); +const { + encodeUtf8, + encodeIntoUtf8, +} = internalBinding('encoding_methods'); + let Buffer; function lazyBuffer() { if (Buffer === undefined) @@ -337,7 +340,7 @@ class TextEncoder { encode(input = '') { validateEncoder(this); - return encodeUtf8String(`${input}`); + return encodeUtf8(`${input}`); } encodeInto(src, dest) { @@ -345,7 +348,7 @@ class TextEncoder { validateString(src, 'src'); if (!dest || !isUint8Array(dest)) throw new ERR_INVALID_ARG_TYPE('dest', 'Uint8Array', dest); - encodeInto(src, dest, encodeIntoResults); + encodeIntoUtf8(src, dest, encodeIntoResults); return { read: encodeIntoResults[0], written: encodeIntoResults[1] }; } diff --git a/node.gyp b/node.gyp index 448cb8a8c7cd49..0b899907789ef0 100644 --- a/node.gyp +++ b/node.gyp @@ -498,6 +498,7 @@ 'src/node_contextify.cc', 'src/node_credentials.cc', 'src/node_dir.cc', + 'src/node_encoding.cc', 'src/node_env_var.cc', 'src/node_errors.cc', 'src/node_external_reference.cc', diff --git a/src/node_binding.cc b/src/node_binding.cc index ab25501dcbae96..463acba163e1e4 100644 --- a/src/node_binding.cc +++ b/src/node_binding.cc @@ -43,6 +43,7 @@ V(contextify) \ V(credentials) \ V(errors) \ + V(encoding_methods) \ V(fs) \ V(fs_dir) \ V(fs_event_wrap) \ diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 0f3b048272fbe7..aec33ca6a59555 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -1146,73 +1146,6 @@ void Swap64(const FunctionCallbackInfo& args) { args.GetReturnValue().Set(args[0]); } - -// Encode a single string to a UTF-8 Uint8Array (not Buffer). -// Used in TextEncoder.prototype.encode. -static void EncodeUtf8String(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - Isolate* isolate = env->isolate(); - CHECK_GE(args.Length(), 1); - CHECK(args[0]->IsString()); - - Local str = args[0].As(); - size_t length = str->Utf8Length(isolate); - - Local ab; - { - NoArrayBufferZeroFillScope no_zero_fill_scope(env->isolate_data()); - std::unique_ptr bs = - ArrayBuffer::NewBackingStore(isolate, length); - - CHECK(bs); - - str->WriteUtf8(isolate, - static_cast(bs->Data()), - -1, // We are certain that `data` is sufficiently large - nullptr, - String::NO_NULL_TERMINATION | String::REPLACE_INVALID_UTF8); - - ab = ArrayBuffer::New(isolate, std::move(bs)); - } - - auto array = Uint8Array::New(ab, 0, length); - args.GetReturnValue().Set(array); -} - - -static void EncodeInto(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - Isolate* isolate = env->isolate(); - CHECK_GE(args.Length(), 3); - CHECK(args[0]->IsString()); - CHECK(args[1]->IsUint8Array()); - CHECK(args[2]->IsUint32Array()); - - Local source = args[0].As(); - - Local dest = args[1].As(); - Local buf = dest->Buffer(); - char* write_result = static_cast(buf->Data()) + dest->ByteOffset(); - size_t dest_length = dest->ByteLength(); - - // results = [ read, written ] - Local result_arr = args[2].As(); - uint32_t* results = reinterpret_cast( - static_cast(result_arr->Buffer()->Data()) + - result_arr->ByteOffset()); - - int nchars; - int written = source->WriteUtf8( - isolate, - write_result, - dest_length, - &nchars, - String::NO_NULL_TERMINATION | String::REPLACE_INVALID_UTF8); - results[0] = nchars; - results[1] = written; -} - - void SetBufferPrototype(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); @@ -1344,9 +1277,6 @@ void Initialize(Local target, SetMethod(context, target, "swap32", Swap32); SetMethod(context, target, "swap64", Swap64); - SetMethod(context, target, "encodeInto", EncodeInto); - SetMethodNoSideEffect(context, target, "encodeUtf8String", EncodeUtf8String); - target ->Set(context, FIXED_ONE_BYTE_STRING(isolate, "kMaxLength"), @@ -1399,9 +1329,6 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(Swap32); registry->Register(Swap64); - registry->Register(EncodeInto); - registry->Register(EncodeUtf8String); - registry->Register(StringSlice); registry->Register(StringSlice); registry->Register(StringSlice); diff --git a/src/node_encoding.cc b/src/node_encoding.cc new file mode 100644 index 00000000000000..bb87d822d0f8e7 --- /dev/null +++ b/src/node_encoding.cc @@ -0,0 +1,150 @@ +#include "env-inl.h" +#include "node.h" +#include "node_external_reference.h" +#include "node_internals.h" +#include "util-inl.h" +#include "v8-fast-api-calls.h" +#include "v8.h" + +#if defined(NODE_HAVE_I18N_SUPPORT) +#include +#endif // NODE_HAVE_I18N_SUPPORT + +namespace node { + +using v8::ArrayBuffer; +using v8::BackingStore; +using v8::CFunction; +using v8::Context; +using v8::FastApiTypedArray; +using v8::FastOneByteString; +using v8::FunctionCallbackInfo; +using v8::Isolate; +using v8::Local; +using v8::Object; +using v8::String; +using v8::Uint32Array; +using v8::Uint8Array; +using v8::Value; + +namespace encoding_methods { + +static void EncodeUtf8(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + Isolate* isolate = env->isolate(); + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); + + Local str = args[0].As(); + size_t length = str->Utf8Length(isolate); + + Local ab; + { + NoArrayBufferZeroFillScope no_zero_fill_scope(env->isolate_data()); + std::unique_ptr bs = + ArrayBuffer::NewBackingStore(isolate, length); + + CHECK(bs); + + str->WriteUtf8(isolate, + static_cast(bs->Data()), + -1, // We are certain that `data` is sufficiently large + nullptr, + String::NO_NULL_TERMINATION | String::REPLACE_INVALID_UTF8); + + ab = ArrayBuffer::New(isolate, std::move(bs)); + } + + args.GetReturnValue().Set(Uint8Array::New(ab, 0, length)); +} + +static void EncodeIntoUtf8(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + Isolate* isolate = env->isolate(); + CHECK_GE(args.Length(), 3); + CHECK(args[0]->IsString()); + CHECK(args[1]->IsUint8Array()); + CHECK(args[2]->IsUint32Array()); + + Local source = args[0].As(); + + Local dest = args[1].As(); + Local buf = dest->Buffer(); + char* write_result = static_cast(buf->Data()) + dest->ByteOffset(); + size_t dest_length = dest->ByteLength(); + + // results = [ read, written ] + Local result_arr = args[2].As(); + uint32_t* results = reinterpret_cast( + static_cast(result_arr->Buffer()->Data()) + + result_arr->ByteOffset()); + + int nchars; + int written = source->WriteUtf8( + isolate, + write_result, + dest_length, + &nchars, + String::NO_NULL_TERMINATION | String::REPLACE_INVALID_UTF8); + results[0] = nchars; + results[1] = written; +} + +#if defined(NODE_HAVE_I18N_SUPPORT) +static void FastEncodeIntoUtf8(Local receiver, + const FastOneByteString& source, + const FastApiTypedArray& destination, + const FastApiTypedArray& result) { + uint8_t* destination_data; + CHECK(destination.getStorageIfAligned(&destination_data)); + + uint32_t* results; + CHECK(result.getStorageIfAligned(&results)); + + size_t source_length = source.length; + size_t destination_length = destination.length(); + size_t min_length = std::min(source_length, destination_length); + + memcpy(destination_data, source.data, min_length); + + results[0] = min_length; + results[1] = min_length; +} + +CFunction fast_encode_into_utf8_(CFunction::Make(FastEncodeIntoUtf8)); +#endif // NODE_HAVE_I18N_SUPPORT + +static void Initialize(Local target, + Local unused, + Local context, + void* priv) { + SetMethodNoSideEffect(context, target, "encodeUtf8", EncodeUtf8); +#if defined(NODE_HAVE_I18N_SUPPORT) + SetFastMethod(context, + target, + "encodeIntoUtf8", + EncodeIntoUtf8, + &fast_encode_into_utf8_); +#else + SetMethodNoSideEffect(context, target, "encodeIntoUtf8", EncodeIntoUtf8); +#endif // NODE_HAVE_I18N_SUPPORT +} + +void RegisterExternalReferences(ExternalReferenceRegistry* registry) { + registry->Register(EncodeUtf8); + + registry->Register(EncodeIntoUtf8); + +#if defined(NODE_HAVE_I18N_SUPPORT) + registry->Register(FastEncodeIntoUtf8); + registry->Register(fast_encode_into_utf8_.GetTypeInfo()); +#endif // NODE_HAVE_I18N_SUPPORT +} + +} // namespace encoding_methods +} // namespace node + +NODE_BINDING_CONTEXT_AWARE_INTERNAL(encoding_methods, + node::encoding_methods::Initialize) +NODE_BINDING_EXTERNAL_REFERENCE( + encoding_methods, node::encoding_methods::RegisterExternalReferences) diff --git a/src/node_external_reference.h b/src/node_external_reference.h index bf4b49670de310..44c395e20bee92 100644 --- a/src/node_external_reference.h +++ b/src/node_external_reference.h @@ -10,7 +10,13 @@ namespace node { +// TODO(anonrig): Find a good way of reusing existing types for fast api usages. using CFunctionCallback = void (*)(v8::Local receiver); +using CFunctionCallbackWithInput = + void (*)(v8::Local receiver, + const v8::FastOneByteString& source, + const v8::FastApiTypedArray& destination, + const v8::FastApiTypedArray& result); // This class manages the external references from the V8 heap // to the C++ addresses in Node.js. @@ -20,6 +26,7 @@ class ExternalReferenceRegistry { #define ALLOWED_EXTERNAL_REFERENCE_TYPES(V) \ V(CFunctionCallback) \ + V(CFunctionCallbackWithInput) \ V(const v8::CFunctionInfo*) \ V(v8::FunctionCallback) \ V(v8::AccessorGetterCallback) \ @@ -67,6 +74,7 @@ class ExternalReferenceRegistry { V(credentials) \ V(env_var) \ V(errors) \ + V(encoding_methods) \ V(fs) \ V(fs_dir) \ V(fs_event_wrap) \ diff --git a/test/parallel/test-bootstrap-modules.js b/test/parallel/test-bootstrap-modules.js index 53da8b1af1492d..f5117936d065d6 100644 --- a/test/parallel/test-bootstrap-modules.js +++ b/test/parallel/test-bootstrap-modules.js @@ -18,6 +18,7 @@ const expectedModules = new Set([ 'Internal Binding contextify', 'Internal Binding credentials', 'Internal Binding errors', + 'Internal Binding encoding_methods', 'Internal Binding fs_dir', 'Internal Binding fs_event_wrap', 'Internal Binding fs', diff --git a/test/parallel/test-util-text-encoder.js b/test/parallel/test-util-text-encoder.js new file mode 100644 index 00000000000000..a55544df7385d4 --- /dev/null +++ b/test/parallel/test-util-text-encoder.js @@ -0,0 +1,50 @@ +'use strict'; + +require('../common'); +const assert = require('assert'); +const { TextEncoder, TextDecoder } = require('util'); + +const encoder = new TextEncoder(); +const decoder = new TextDecoder(); + +assert.strictEqual(decoder.decode(encoder.encode('')), ''); +assert.strictEqual(decoder.decode(encoder.encode('latin1')), 'latin1'); +assert.strictEqual(decoder.decode(encoder.encode('Yağız')), 'Yağız'); + +// For loop is required to trigger the fast path for encodeInto +// Since v8 fast path is only triggered when v8 optimization starts. +for (let i = 0; i < 1e4; i++) { + { + const dest = new Uint8Array(10).fill(0); + const encoded = encoder.encodeInto('Yağız', dest); + assert.strictEqual(encoded.read, 5); + assert.strictEqual(encoded.written, 7); + assert.strictEqual(decoder.decode(dest), 'Yağız\x00\x00\x00'); + } + + { + const dest = new Uint8Array(1024).fill(0); + const encoded = encoder.encodeInto('latin', dest); + assert.strictEqual(encoded.read, 5); + assert.strictEqual(encoded.written, 5); + assert.strictEqual(decoder.decode(dest.slice(0, 5)), 'latin'); + } + + { + const input = 'latin'; + const dest = new Uint8Array(10).fill(0); + const encoded = encoder.encodeInto(input, dest); + assert.strictEqual(encoded.read, input.length); + assert.strictEqual(encoded.written, input.length); + assert.strictEqual(decoder.decode(dest.slice(0, 5)), 'latin'); + } + + { + const input = 'latin'; + const dest = new Uint8Array(1).fill(0); + const encoded = encoder.encodeInto(input, dest); + assert.strictEqual(encoded.read, 1); + assert.strictEqual(encoded.written, 1); + assert.strictEqual(decoder.decode(dest), 'l'); + } +} From 815fbeef451468fd4eb57a634c09523eb0bf9aea Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Mon, 5 Dec 2022 17:09:35 -0500 Subject: [PATCH 3/3] src: move decodeUtf8 to node_encoding --- lib/internal/encoding.js | 7 ++---- src/node_buffer.cc | 46 ------------------------------------ src/node_encoding.cc | 50 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 51 insertions(+), 52 deletions(-) diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index 1205cf3ba8e5b9..050166c0eca869 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -51,10 +51,7 @@ const { } = require('internal/validators'); const { - decodeUTF8, -} = internalBinding('buffer'); - -const { + decodeUtf8, encodeUtf8, encodeIntoUtf8, } = internalBinding('encoding_methods'); @@ -433,7 +430,7 @@ function makeTextDecoderICU() { this[kUTF8FastPath] &&= !(options?.stream); if (this[kUTF8FastPath]) { - return decodeUTF8(input, this[kIgnoreBOM]); + return decodeUtf8(input, this[kIgnoreBOM]); } this.#prepareConverter(); diff --git a/src/node_buffer.cc b/src/node_buffer.cc index aec33ca6a59555..e77557e51118a7 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -566,50 +566,6 @@ void StringSlice(const FunctionCallbackInfo& args) { args.GetReturnValue().Set(ret); } -// Convert the input into an encoded string -void DecodeUTF8(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); // list, flags - - CHECK_GE(args.Length(), 1); - - if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() || - args[0]->IsArrayBufferView())) { - return node::THROW_ERR_INVALID_ARG_TYPE( - env->isolate(), - "The \"list\" argument must be an instance of SharedArrayBuffer, " - "ArrayBuffer or ArrayBufferView."); - } - - ArrayBufferViewContents buffer(args[0]); - - bool ignore_bom = args[1]->IsTrue(); - - const char* data = buffer.data(); - size_t length = buffer.length(); - - if (!ignore_bom && length >= 3) { - if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) { - data += 3; - length -= 3; - } - } - - if (length == 0) return args.GetReturnValue().SetEmptyString(); - - Local error; - MaybeLocal maybe_ret = - StringBytes::Encode(env->isolate(), data, length, UTF8, &error); - Local ret; - - if (!maybe_ret.ToLocal(&ret)) { - CHECK(!error.IsEmpty()); - env->isolate()->ThrowException(error); - return; - } - - args.GetReturnValue().Set(ret); -} - // bytesCopied = copy(buffer, target[, targetStart][, sourceStart][, sourceEnd]) void Copy(const FunctionCallbackInfo &args) { Environment* env = Environment::GetCurrent(args); @@ -1259,7 +1215,6 @@ void Initialize(Local target, SetMethod(context, target, "setBufferPrototype", SetBufferPrototype); SetMethodNoSideEffect(context, target, "createFromString", CreateFromString); - SetMethodNoSideEffect(context, target, "decodeUTF8", DecodeUTF8); SetMethodNoSideEffect(context, target, "byteLengthUtf8", ByteLengthUtf8); SetMethod(context, target, "copy", Copy); @@ -1314,7 +1269,6 @@ void Initialize(Local target, void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(SetBufferPrototype); registry->Register(CreateFromString); - registry->Register(DecodeUTF8); registry->Register(ByteLengthUtf8); registry->Register(Copy); diff --git a/src/node_encoding.cc b/src/node_encoding.cc index bb87d822d0f8e7..902ba5dfa1686d 100644 --- a/src/node_encoding.cc +++ b/src/node_encoding.cc @@ -1,7 +1,9 @@ #include "env-inl.h" #include "node.h" +#include "node_errors.h" #include "node_external_reference.h" #include "node_internals.h" +#include "string_bytes.h" #include "util-inl.h" #include "v8-fast-api-calls.h" #include "v8.h" @@ -21,6 +23,7 @@ using v8::FastOneByteString; using v8::FunctionCallbackInfo; using v8::Isolate; using v8::Local; +using v8::MaybeLocal; using v8::Object; using v8::String; using v8::Uint32Array; @@ -111,6 +114,49 @@ static void FastEncodeIntoUtf8(Local receiver, results[1] = min_length; } +void DecodeUtf8(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); // list, flags + + CHECK_GE(args.Length(), 1); + + if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() || + args[0]->IsArrayBufferView())) { + return node::THROW_ERR_INVALID_ARG_TYPE( + env->isolate(), + "The \"list\" argument must be an instance of SharedArrayBuffer, " + "ArrayBuffer or ArrayBufferView."); + } + + ArrayBufferViewContents buffer(args[0]); + + bool ignore_bom = args[1]->IsTrue(); + + const char* data = buffer.data(); + size_t length = buffer.length(); + + if (!ignore_bom && length >= 3) { + if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) { + data += 3; + length -= 3; + } + } + + if (length == 0) return args.GetReturnValue().SetEmptyString(); + + Local error; + MaybeLocal maybe_ret = + StringBytes::Encode(env->isolate(), data, length, UTF8, &error); + Local ret; + + if (!maybe_ret.ToLocal(&ret)) { + CHECK(!error.IsEmpty()); + env->isolate()->ThrowException(error); + return; + } + + args.GetReturnValue().Set(ret); +} + CFunction fast_encode_into_utf8_(CFunction::Make(FastEncodeIntoUtf8)); #endif // NODE_HAVE_I18N_SUPPORT @@ -128,17 +174,19 @@ static void Initialize(Local target, #else SetMethodNoSideEffect(context, target, "encodeIntoUtf8", EncodeIntoUtf8); #endif // NODE_HAVE_I18N_SUPPORT + SetMethodNoSideEffect(context, target, "decodeUtf8", DecodeUtf8); } void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(EncodeUtf8); registry->Register(EncodeIntoUtf8); - #if defined(NODE_HAVE_I18N_SUPPORT) registry->Register(FastEncodeIntoUtf8); registry->Register(fast_encode_into_utf8_.GetTypeInfo()); #endif // NODE_HAVE_I18N_SUPPORT + + registry->Register(DecodeUtf8); } } // namespace encoding_methods