From 1f41306efe5757cc52aff85598a690c2f0e628ea Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Fri, 2 Dec 2022 10:10:20 -0500 Subject: [PATCH] src: add encoding_methods with fast api --- benchmark/util/text-encoder.js | 20 +++- lib/internal/bootstrap/loaders.js | 1 + lib/internal/encoding.js | 18 ++- node.gyp | 1 + src/node_binding.cc | 1 + src/node_buffer.cc | 73 ------------ src/node_encoding.cc | 147 ++++++++++++++++++++++++ src/node_external_reference.h | 9 ++ test/parallel/test-bootstrap-modules.js | 1 + test/parallel/test-util-text-encoder.js | 38 ++++++ 10 files changed, 227 insertions(+), 82 deletions(-) create mode 100644 src/node_encoding.cc create mode 100644 test/parallel/test-util-text-encoder.js diff --git a/benchmark/util/text-encoder.js b/benchmark/util/text-encoder.js index ca3cb827779be3..e3c900615a40a0 100644 --- a/benchmark/util/text-encoder.js +++ b/benchmark/util/text-encoder.js @@ -2,17 +2,27 @@ const common = require('../common.js'); -const BASE = 'string\ud801'; - const bench = common.createBenchmark(main, { - len: [256, 1024, 1024 * 32], + len: [0, 256, 1024, 1024 * 32], n: [1e4], + type: ['v8-one-byte-string', 'v8-two-byte-string'], op: ['encode', 'encodeInto'] }); -function main({ n, op, len }) { +function main({ n, op, len, type }) { const encoder = new TextEncoder(); - const input = BASE.repeat(len); + let base = ''; + + switch (type) { + case 'v8-one-byte-string': + base = 'a'; + break; + case 'v8-two-byte-string': + base = 'ğ'; + break; + } + + const input = base.repeat(len); const subarray = new Uint8Array(len); bench.start(); diff --git a/lib/internal/bootstrap/loaders.js b/lib/internal/bootstrap/loaders.js index f25fb7c9f44391..1782511fecf6d8 100644 --- a/lib/internal/bootstrap/loaders.js +++ b/lib/internal/bootstrap/loaders.js @@ -85,6 +85,7 @@ const internalBindingAllowlist = new SafeSet([ 'constants', 'contextify', 'crypto', + 'encoding_methods', 'fs', 'fs_event_wrap', 'http_parser', diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index 0e3c44d2e84fbb..5a18971b9d4ee9 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -51,11 +51,14 @@ const { } = require('internal/validators'); const { - encodeInto, - encodeUtf8String, decodeUTF8, } = internalBinding('buffer'); +const { + encodeUtf8, + encodeIntoUtf8, +} = internalBinding('encoding_methods'); + let Buffer; function lazyBuffer() { if (Buffer === undefined) @@ -337,7 +340,11 @@ class TextEncoder { encode(input = '') { validateEncoder(this); - return encodeUtf8String(`${input}`); + input = `${input}`; + if (input.length > 0) { + return encodeUtf8(input); + } + return new Uint8Array([]); } encodeInto(src, dest) { @@ -345,7 +352,10 @@ class TextEncoder { validateString(src, 'src'); if (!dest || !isUint8Array(dest)) throw new ERR_INVALID_ARG_TYPE('dest', 'Uint8Array', dest); - encodeInto(src, dest, encodeIntoResults); + if (src.length === 0) { + return { read: 0, written: 0 }; + } + encodeIntoUtf8(src, dest, encodeIntoResults); return { read: encodeIntoResults[0], written: encodeIntoResults[1] }; } diff --git a/node.gyp b/node.gyp index 448cb8a8c7cd49..0b899907789ef0 100644 --- a/node.gyp +++ b/node.gyp @@ -498,6 +498,7 @@ 'src/node_contextify.cc', 'src/node_credentials.cc', 'src/node_dir.cc', + 'src/node_encoding.cc', 'src/node_env_var.cc', 'src/node_errors.cc', 'src/node_external_reference.cc', diff --git a/src/node_binding.cc b/src/node_binding.cc index ab25501dcbae96..463acba163e1e4 100644 --- a/src/node_binding.cc +++ b/src/node_binding.cc @@ -43,6 +43,7 @@ V(contextify) \ V(credentials) \ V(errors) \ + V(encoding_methods) \ V(fs) \ V(fs_dir) \ V(fs_event_wrap) \ diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 0f3b048272fbe7..aec33ca6a59555 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -1146,73 +1146,6 @@ void Swap64(const FunctionCallbackInfo& args) { args.GetReturnValue().Set(args[0]); } - -// Encode a single string to a UTF-8 Uint8Array (not Buffer). -// Used in TextEncoder.prototype.encode. -static void EncodeUtf8String(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - Isolate* isolate = env->isolate(); - CHECK_GE(args.Length(), 1); - CHECK(args[0]->IsString()); - - Local str = args[0].As(); - size_t length = str->Utf8Length(isolate); - - Local ab; - { - NoArrayBufferZeroFillScope no_zero_fill_scope(env->isolate_data()); - std::unique_ptr bs = - ArrayBuffer::NewBackingStore(isolate, length); - - CHECK(bs); - - str->WriteUtf8(isolate, - static_cast(bs->Data()), - -1, // We are certain that `data` is sufficiently large - nullptr, - String::NO_NULL_TERMINATION | String::REPLACE_INVALID_UTF8); - - ab = ArrayBuffer::New(isolate, std::move(bs)); - } - - auto array = Uint8Array::New(ab, 0, length); - args.GetReturnValue().Set(array); -} - - -static void EncodeInto(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - Isolate* isolate = env->isolate(); - CHECK_GE(args.Length(), 3); - CHECK(args[0]->IsString()); - CHECK(args[1]->IsUint8Array()); - CHECK(args[2]->IsUint32Array()); - - Local source = args[0].As(); - - Local dest = args[1].As(); - Local buf = dest->Buffer(); - char* write_result = static_cast(buf->Data()) + dest->ByteOffset(); - size_t dest_length = dest->ByteLength(); - - // results = [ read, written ] - Local result_arr = args[2].As(); - uint32_t* results = reinterpret_cast( - static_cast(result_arr->Buffer()->Data()) + - result_arr->ByteOffset()); - - int nchars; - int written = source->WriteUtf8( - isolate, - write_result, - dest_length, - &nchars, - String::NO_NULL_TERMINATION | String::REPLACE_INVALID_UTF8); - results[0] = nchars; - results[1] = written; -} - - void SetBufferPrototype(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); @@ -1344,9 +1277,6 @@ void Initialize(Local target, SetMethod(context, target, "swap32", Swap32); SetMethod(context, target, "swap64", Swap64); - SetMethod(context, target, "encodeInto", EncodeInto); - SetMethodNoSideEffect(context, target, "encodeUtf8String", EncodeUtf8String); - target ->Set(context, FIXED_ONE_BYTE_STRING(isolate, "kMaxLength"), @@ -1399,9 +1329,6 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(Swap32); registry->Register(Swap64); - registry->Register(EncodeInto); - registry->Register(EncodeUtf8String); - registry->Register(StringSlice); registry->Register(StringSlice); registry->Register(StringSlice); diff --git a/src/node_encoding.cc b/src/node_encoding.cc new file mode 100644 index 00000000000000..b2e980fe00bc3f --- /dev/null +++ b/src/node_encoding.cc @@ -0,0 +1,147 @@ +#include "env-inl.h" +#include "node.h" +#include "node_external_reference.h" +#include "node_internals.h" +#include "util-inl.h" +#include "v8-fast-api-calls.h" +#include "v8.h" + +#if defined(NODE_HAVE_I18N_SUPPORT) +#include +#endif // NODE_HAVE_I18N_SUPPORT + +namespace node { + +using v8::ArrayBuffer; +using v8::BackingStore; +using v8::CFunction; +using v8::Context; +using v8::FastApiCallbackOptions; +using v8::FastApiTypedArray; +using v8::FastOneByteString; +using v8::FunctionCallbackInfo; +using v8::Isolate; +using v8::Local; +using v8::Object; +using v8::String; +using v8::Uint32Array; +using v8::Uint8Array; +using v8::Value; + +namespace encoding_methods { + +static void EncodeUtf8(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + Isolate* isolate = env->isolate(); + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); + + Local str = args[0].As(); + size_t length = str->Utf8Length(isolate); + + Local ab; + { + NoArrayBufferZeroFillScope no_zero_fill_scope(env->isolate_data()); + std::unique_ptr bs = + ArrayBuffer::NewBackingStore(isolate, length); + + CHECK(bs); + + str->WriteUtf8(isolate, + static_cast(bs->Data()), + -1, // We are certain that `data` is sufficiently large + nullptr, + String::NO_NULL_TERMINATION | String::REPLACE_INVALID_UTF8); + + ab = ArrayBuffer::New(isolate, std::move(bs)); + } + + args.GetReturnValue().Set(Uint8Array::New(ab, 0, length)); +} + +static void EncodeIntoUtf8(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + Isolate* isolate = env->isolate(); + CHECK_GE(args.Length(), 3); + CHECK(args[0]->IsString()); + CHECK(args[1]->IsUint8Array()); + CHECK(args[2]->IsUint32Array()); + + Local source = args[0].As(); + + Local dest = args[1].As(); + Local buf = dest->Buffer(); + char* write_result = static_cast(buf->Data()) + dest->ByteOffset(); + size_t dest_length = dest->ByteLength(); + + // results = [ read, written ] + Local result_arr = args[2].As(); + uint32_t* results = reinterpret_cast( + static_cast(result_arr->Buffer()->Data()) + + result_arr->ByteOffset()); + + int nchars; + int written = source->WriteUtf8( + isolate, + write_result, + dest_length, + &nchars, + String::NO_NULL_TERMINATION | String::REPLACE_INVALID_UTF8); + results[0] = nchars; + results[1] = written; +} + +static void FastEncodeIntoUtf8( + Local receiver, + const FastOneByteString& source, + const FastApiTypedArray& destination, + const FastApiTypedArray& result, + FastApiCallbackOptions& options) { // NOLINT(runtime/references) +#if defined(NODE_HAVE_I18N_SUPPORT) + uint8_t* destination_data; + CHECK(destination.getStorageIfAligned(&destination_data)); + + uint32_t* results; + CHECK(result.getStorageIfAligned(&results)); + + size_t destination_length = destination.length(); + size_t source_length = source.length; + size_t written = std::min(source_length, destination_length); + + U8_GET_UNSAFE(source.data, 0, *results); + results[0] = source_length; + results[1] = written; +#else + options.fallback = true; +#endif // NODE_HAVE_I18N_SUPPORT +} + +CFunction fast_encode_into_utf8_(CFunction::Make(FastEncodeIntoUtf8)); + +static void Initialize(Local target, + Local unused, + Local context, + void* priv) { + SetMethodNoSideEffect(context, target, "encodeUtf8", EncodeUtf8); + SetFastMethod(context, + target, + "encodeIntoUtf8", + EncodeIntoUtf8, + &fast_encode_into_utf8_); +} + +void RegisterExternalReferences(ExternalReferenceRegistry* registry) { + registry->Register(EncodeUtf8); + + registry->Register(EncodeIntoUtf8); + registry->Register(FastEncodeIntoUtf8); + registry->Register(fast_encode_into_utf8_.GetTypeInfo()); +} + +} // namespace encoding_methods +} // namespace node + +NODE_BINDING_CONTEXT_AWARE_INTERNAL(encoding_methods, + node::encoding_methods::Initialize) +NODE_BINDING_EXTERNAL_REFERENCE( + encoding_methods, node::encoding_methods::RegisterExternalReferences) diff --git a/src/node_external_reference.h b/src/node_external_reference.h index bf4b49670de310..3e03b2b4672112 100644 --- a/src/node_external_reference.h +++ b/src/node_external_reference.h @@ -10,7 +10,14 @@ namespace node { +// TODO(anonrig): Find a good way of reusing existing types for fast api usages. using CFunctionCallback = void (*)(v8::Local receiver); +using CFunctionCallbackWithInput = void (*)( + v8::Local receiver, + const v8::FastOneByteString& source, + const v8::FastApiTypedArray& destination, + const v8::FastApiTypedArray& result, + v8::FastApiCallbackOptions& options); // NOLINT(runtime/references) // This class manages the external references from the V8 heap // to the C++ addresses in Node.js. @@ -20,6 +27,7 @@ class ExternalReferenceRegistry { #define ALLOWED_EXTERNAL_REFERENCE_TYPES(V) \ V(CFunctionCallback) \ + V(CFunctionCallbackWithInput) \ V(const v8::CFunctionInfo*) \ V(v8::FunctionCallback) \ V(v8::AccessorGetterCallback) \ @@ -67,6 +75,7 @@ class ExternalReferenceRegistry { V(credentials) \ V(env_var) \ V(errors) \ + V(encoding_methods) \ V(fs) \ V(fs_dir) \ V(fs_event_wrap) \ diff --git a/test/parallel/test-bootstrap-modules.js b/test/parallel/test-bootstrap-modules.js index 53da8b1af1492d..f5117936d065d6 100644 --- a/test/parallel/test-bootstrap-modules.js +++ b/test/parallel/test-bootstrap-modules.js @@ -18,6 +18,7 @@ const expectedModules = new Set([ 'Internal Binding contextify', 'Internal Binding credentials', 'Internal Binding errors', + 'Internal Binding encoding_methods', 'Internal Binding fs_dir', 'Internal Binding fs_event_wrap', 'Internal Binding fs', diff --git a/test/parallel/test-util-text-encoder.js b/test/parallel/test-util-text-encoder.js new file mode 100644 index 00000000000000..bd0c57dd183d0b --- /dev/null +++ b/test/parallel/test-util-text-encoder.js @@ -0,0 +1,38 @@ +'use strict'; + +require('../common'); +const assert = require('assert'); +const { TextEncoder, TextDecoder } = require('util'); + +const encoder = new TextEncoder(); +const decoder = new TextDecoder(); + +assert.strictEqual(decoder.decode(encoder.encode('')), ''); +assert.strictEqual(decoder.decode(encoder.encode('latin1')), 'latin1'); +assert.strictEqual(decoder.decode(encoder.encode('Yağız')), 'Yağız'); + +{ + const dest = new Uint8Array(5).fill(0); + const encoded = encoder.encodeInto('latin', dest); + assert.strictEqual(encoded.read, dest.length); + assert.strictEqual(encoded.written, dest.length); + assert.strictEqual(decoder.decode(dest), 'latin'); +} + +{ + const input = 'latin'; + const dest = new Uint8Array(10).fill(0); + const encoded = encoder.encodeInto(input, dest); + assert.strictEqual(encoded.read, input.length); + assert.strictEqual(encoded.written, input.length); + assert.strictEqual(decoder.decode(dest.slice(0, 5)), 'latin'); +} + +{ + const input = 'latin'; + const dest = new Uint8Array(1).fill(0); + const encoded = encoder.encodeInto(input, dest); + assert.strictEqual(encoded.read, 1); + assert.strictEqual(encoded.written, 1); + assert.strictEqual(decoder.decode(dest), 'l'); +}