From 87039c83544023c9a921d23c2625dc2300b56e4e Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Thu, 10 Nov 2022 14:32:48 -0500 Subject: [PATCH] util: add fast path for utf8 encoding Co-authored-by: Anna Henningsen --- lib/internal/encoding.js | 35 ++++++++++++--- src/node_buffer.cc | 45 +++++++++++++++++++ ...test-whatwg-encoding-custom-textdecoder.js | 2 +- 3 files changed, 76 insertions(+), 6 deletions(-) diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index 66dc0ce9c8609a..0e3c44d2e84fbb 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -4,6 +4,7 @@ // https://encoding.spec.whatwg.org const { + Boolean, ObjectCreate, ObjectDefineProperties, ObjectGetOwnPropertyDescriptors, @@ -28,6 +29,8 @@ const kFlags = Symbol('flags'); const kEncoding = Symbol('encoding'); const kDecoder = Symbol('decoder'); const kEncoder = Symbol('encoder'); +const kUTF8FastPath = Symbol('kUTF8FastPath'); +const kIgnoreBOM = Symbol('kIgnoreBOM'); const { getConstructorOf, @@ -49,7 +52,8 @@ const { const { encodeInto, - encodeUtf8String + encodeUtf8String, + decodeUTF8, } = internalBinding('buffer'); let Buffer; @@ -397,19 +401,40 @@ function makeTextDecoderICU() { flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0; } - const handle = getConverter(enc, flags); - if (handle === undefined) - throw new ERR_ENCODING_NOT_SUPPORTED(encoding); + // Only support fast path for UTF-8 without FATAL flag + const fastPathAvailable = enc === 'utf-8' && !(options?.fatal); this[kDecoder] = true; - this[kHandle] = handle; this[kFlags] = flags; this[kEncoding] = enc; + this[kIgnoreBOM] = Boolean(options?.ignoreBOM); + this[kUTF8FastPath] = fastPathAvailable; + this[kHandle] = undefined; + + if (!fastPathAvailable) { + this.#prepareConverter(); + } } + #prepareConverter() { + if (this[kHandle] !== undefined) return; + const handle = getConverter(this[kEncoding], this[kFlags]); + if (handle === undefined) + throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]); + this[kHandle] = handle; + } decode(input = empty, options = kEmptyObject) { validateDecoder(this); + + this[kUTF8FastPath] &&= !(options?.stream); + + if (this[kUTF8FastPath]) { + return decodeUTF8(input, this[kIgnoreBOM]); + } + + this.#prepareConverter(); + validateObject(options, 'options', { nullable: true, allowArray: true, diff --git a/src/node_buffer.cc b/src/node_buffer.cc index eb8e541c68635d..acec3c420ce1d2 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -24,6 +24,7 @@ #include "node_blob.h" #include "node_errors.h" #include "node_external_reference.h" +#include "node_i18n.h" #include "node_internals.h" #include "env-inl.h" @@ -565,6 +566,48 @@ void StringSlice(const FunctionCallbackInfo& args) { args.GetReturnValue().Set(ret); } +// Convert the input into an encoded string +void DecodeUTF8(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); // list, flags + + if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() || + args[0]->IsArrayBufferView())) { + return node::THROW_ERR_INVALID_ARG_TYPE( + env->isolate(), + "The \"list\" argument must be an instance of SharedArrayBuffer, " + "ArrayBuffer or ArrayBufferView."); + } + + ArrayBufferViewContents buffer(args[0]); + + CHECK(args[1]->IsBoolean()); + bool ignore_bom = args[1]->IsTrue(); + + const char* data = buffer.data(); + size_t length = buffer.length(); + + if (!ignore_bom && length >= 3) { + if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) { + data += 3; + length -= 3; + } + } + + if (length == 0) return args.GetReturnValue().SetEmptyString(); + + Local error; + MaybeLocal maybe_ret = + StringBytes::Encode(env->isolate(), data, length, UTF8, &error); + Local ret; + + if (!maybe_ret.ToLocal(&ret)) { + CHECK(!error.IsEmpty()); + env->isolate()->ThrowException(error); + return; + } + + args.GetReturnValue().Set(ret); +} // bytesCopied = copy(buffer, target[, targetStart][, sourceStart][, sourceEnd]) void Copy(const FunctionCallbackInfo &args) { @@ -1282,6 +1325,7 @@ void Initialize(Local target, SetMethod(context, target, "setBufferPrototype", SetBufferPrototype); SetMethodNoSideEffect(context, target, "createFromString", CreateFromString); + SetMethodNoSideEffect(context, target, "decodeUTF8", DecodeUTF8); SetMethodNoSideEffect(context, target, "byteLengthUtf8", ByteLengthUtf8); SetMethod(context, target, "copy", Copy); @@ -1339,6 +1383,7 @@ void Initialize(Local target, void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(SetBufferPrototype); registry->Register(CreateFromString); + registry->Register(DecodeUTF8); registry->Register(ByteLengthUtf8); registry->Register(Copy); diff --git a/test/parallel/test-whatwg-encoding-custom-textdecoder.js b/test/parallel/test-whatwg-encoding-custom-textdecoder.js index 75a2a4735cd6ef..a48d0993fc7a92 100644 --- a/test/parallel/test-whatwg-encoding-custom-textdecoder.js +++ b/test/parallel/test-whatwg-encoding-custom-textdecoder.js @@ -113,7 +113,7 @@ if (common.hasIntl) { ' fatal: false,\n' + ' ignoreBOM: true,\n' + ' [Symbol(flags)]: 4,\n' + - ' [Symbol(handle)]: Converter {}\n' + + ' [Symbol(handle)]: undefined\n' + '}' ); } else {