From 10910512583598950727811ff5c3f8ec546bf717 Mon Sep 17 00:00:00 2001 From: Robert Nagy Date: Sat, 10 Aug 2024 20:49:11 +0200 Subject: [PATCH] buffer: optimize writing short strings PR-URL: https://github.com/nodejs/node/pull/54310 --- .../buffers/buffer-write-string-short.js | 20 + lib/buffer.js | 155 ++++-- src/node_buffer.cc | 450 +++++++----------- src/node_external_reference.h | 8 + 4 files changed, 309 insertions(+), 324 deletions(-) create mode 100644 benchmark/buffers/buffer-write-string-short.js diff --git a/benchmark/buffers/buffer-write-string-short.js b/benchmark/buffers/buffer-write-string-short.js new file mode 100644 index 00000000000000..152a6c1ede2abe --- /dev/null +++ b/benchmark/buffers/buffer-write-string-short.js @@ -0,0 +1,20 @@ +'use strict'; + +const common = require('../common.js'); +const bench = common.createBenchmark(main, { + encoding: [ + '', 'utf8', 'ascii', 'latin1', + ], + len: [0, 1, 8, 16, 32], + n: [1e6], +}); + +function main({ len, n, encoding }) { + const buf = Buffer.allocUnsafe(len); + const string = Buffer.from('a'.repeat(len)).toString() + bench.start(); + for (let i = 0; i < n; ++i) { + buf.write(string, 0, encoding); + } + bench.end(n); +} diff --git a/lib/buffer.js b/lib/buffer.js index 4e6031afdb3919..d5174285819282 100644 --- a/lib/buffer.js +++ b/lib/buffer.js @@ -23,13 +23,13 @@ const { Array, - ArrayBufferIsView, + ArrayFrom, ArrayIsArray, ArrayPrototypeForEach, + ArrayPrototypeIndexOf, MathFloor, MathMin, MathTrunc, - NumberIsInteger, NumberIsNaN, NumberMAX_SAFE_INTEGER, NumberMIN_SAFE_INTEGER, @@ -43,10 +43,10 @@ const { StringPrototypeTrim, SymbolSpecies, SymbolToPrimitive, - TypedArrayPrototypeFill, TypedArrayPrototypeGetBuffer, TypedArrayPrototypeGetByteLength, TypedArrayPrototypeGetByteOffset, + TypedArrayPrototypeFill, TypedArrayPrototypeGetLength, TypedArrayPrototypeSet, TypedArrayPrototypeSlice, @@ -58,7 +58,6 @@ const { byteLengthUtf8, compare: _compare, compareOffset, - copy: _copy, createFromString, fill: bindingFill, isAscii: bindingIsAscii, @@ -71,9 +70,10 @@ const { swap64: _swap64, kMaxLength, kStringMaxLength, - atob: _atob, - btoa: _btoa, } = internalBinding('buffer'); + +const bufferBinding = internalBinding('buffer'); + const { constants: { ALL_PROPERTIES, @@ -88,7 +88,6 @@ const { normalizeEncoding, kIsEncodingSymbol, defineLazyProperties, - encodingsMap, } = require('internal/util'); const { isAnyArrayBuffer, @@ -99,6 +98,7 @@ const { const { inspect: utilInspect, } = require('internal/util/inspect'); +const { encodings } = internalBinding('string_decoder'); const { codes: { @@ -106,8 +106,8 @@ const { ERR_INVALID_ARG_TYPE, ERR_INVALID_ARG_VALUE, ERR_INVALID_BUFFER_SIZE, - ERR_MISSING_ARGS, ERR_OUT_OF_RANGE, + ERR_MISSING_ARGS, ERR_UNKNOWN_ENCODING, }, genericNodeError, @@ -152,6 +152,10 @@ const constants = ObjectDefineProperties({}, { Buffer.poolSize = 8 * 1024; let poolSize, poolOffset, allocPool; +const encodingsMap = { __proto__: null }; +for (let i = 0; i < encodings.length; ++i) + encodingsMap[encodings[i]] = i; + function createPool() { poolSize = Buffer.poolSize; allocPool = createUnsafeBuffer(poolSize).buffer; @@ -202,16 +206,16 @@ function toInteger(n, defaultVal) { return defaultVal; } -function copyImpl(source, target, targetStart, sourceStart, sourceEnd) { - if (!ArrayBufferIsView(source)) +function _copy(source, target, targetStart, sourceStart, sourceEnd) { + if (!isUint8Array(source)) throw new ERR_INVALID_ARG_TYPE('source', ['Buffer', 'Uint8Array'], source); - if (!ArrayBufferIsView(target)) + if (!isUint8Array(target)) throw new ERR_INVALID_ARG_TYPE('target', ['Buffer', 'Uint8Array'], target); if (targetStart === undefined) { targetStart = 0; } else { - targetStart = NumberIsInteger(targetStart) ? targetStart : toInteger(targetStart, 0); + targetStart = toInteger(targetStart, 0); if (targetStart < 0) throw new ERR_OUT_OF_RANGE('targetStart', '>= 0', targetStart); } @@ -219,38 +223,38 @@ function copyImpl(source, target, targetStart, sourceStart, sourceEnd) { if (sourceStart === undefined) { sourceStart = 0; } else { - sourceStart = NumberIsInteger(sourceStart) ? sourceStart : toInteger(sourceStart, 0); - if (sourceStart < 0 || sourceStart > source.byteLength) - throw new ERR_OUT_OF_RANGE('sourceStart', `>= 0 && <= ${source.byteLength}`, sourceStart); + sourceStart = toInteger(sourceStart, 0); + if (sourceStart < 0 || sourceStart > source.length) + throw new ERR_OUT_OF_RANGE('sourceStart', `>= 0 && <= ${source.length}`, sourceStart); } if (sourceEnd === undefined) { - sourceEnd = source.byteLength; + sourceEnd = source.length; } else { - sourceEnd = NumberIsInteger(sourceEnd) ? sourceEnd : toInteger(sourceEnd, 0); + sourceEnd = toInteger(sourceEnd, 0); if (sourceEnd < 0) throw new ERR_OUT_OF_RANGE('sourceEnd', '>= 0', sourceEnd); } - if (targetStart >= target.byteLength || sourceStart >= sourceEnd) + if (targetStart >= target.length || sourceStart >= sourceEnd) return 0; return _copyActual(source, target, targetStart, sourceStart, sourceEnd); } function _copyActual(source, target, targetStart, sourceStart, sourceEnd) { - if (sourceEnd - sourceStart > target.byteLength - targetStart) - sourceEnd = sourceStart + target.byteLength - targetStart; + if (sourceEnd - sourceStart > target.length - targetStart) + sourceEnd = sourceStart + target.length - targetStart; let nb = sourceEnd - sourceStart; - const sourceLen = source.byteLength - sourceStart; + const sourceLen = source.length - sourceStart; if (nb > sourceLen) nb = sourceLen; - if (nb <= 0) - return 0; + if (sourceStart !== 0 || sourceEnd < source.length) + source = new Uint8Array(source.buffer, source.byteOffset + sourceStart, nb); - _copy(source, target, targetStart, sourceStart, nb); + TypedArrayPrototypeSet(target, source, targetStart); return nb; } @@ -620,7 +624,7 @@ const encodingOps = { encoding: 'utf8', encodingVal: encodingsMap.utf8, byteLength: byteLengthUtf8, - write: (buf, string, offset, len) => buf.utf8Write(string, offset, len), + write: (buf, string, offset, len) => bufferBinding.utf8WriteStatic(buf, string, offset, len), slice: (buf, start, end) => buf.utf8Slice(start, end), indexOf: (buf, val, byteOffset, dir) => indexOfString(buf, val, byteOffset, encodingsMap.utf8, dir), @@ -647,7 +651,7 @@ const encodingOps = { encoding: 'latin1', encodingVal: encodingsMap.latin1, byteLength: (string) => string.length, - write: (buf, string, offset, len) => buf.latin1Write(string, offset, len), + write: (buf, string, offset, len) => bufferBinding.latin1WriteStatic(buf, string, offset, len), slice: (buf, start, end) => buf.latin1Slice(start, end), indexOf: (buf, val, byteOffset, dir) => indexOfString(buf, val, byteOffset, encodingsMap.latin1, dir), @@ -656,7 +660,7 @@ const encodingOps = { encoding: 'ascii', encodingVal: encodingsMap.ascii, byteLength: (string) => string.length, - write: (buf, string, offset, len) => buf.asciiWrite(string, offset, len), + write: (buf, string, offset, len) => bufferBinding.asciiWriteStatic(buf, string, offset, len), slice: (buf, start, end) => buf.asciiSlice(start, end), indexOf: (buf, val, byteOffset, dir) => indexOfBuffer(buf, @@ -804,7 +808,7 @@ ObjectDefineProperty(Buffer.prototype, 'offset', { Buffer.prototype.copy = function copy(target, targetStart, sourceStart, sourceEnd) { - return copyImpl(this, target, targetStart, sourceStart, sourceEnd); + return _copy(this, target, targetStart, sourceStart, sourceEnd); }; // No need to verify that "buf.length <= MAX_UINT32" since it's a read-only @@ -1253,33 +1257,94 @@ function btoa(input) { if (arguments.length === 0) { throw new ERR_MISSING_ARGS('input'); } - const result = _btoa(`${input}`); - if (result === -1) { - throw lazyDOMException('Invalid character', 'InvalidCharacterError'); + input = `${input}`; + for (let n = 0; n < input.length; n++) { + if (input[n].charCodeAt(0) > 0xff) + throw lazyDOMException('Invalid character', 'InvalidCharacterError'); } - return result; + const buf = Buffer.from(input, 'latin1'); + return buf.toString('base64'); } +// Refs: https://infra.spec.whatwg.org/#forgiving-base64-decode +const kForgivingBase64AllowedChars = [ + // ASCII whitespace + // Refs: https://infra.spec.whatwg.org/#ascii-whitespace + 0x09, 0x0A, 0x0C, 0x0D, 0x20, + + // Uppercase letters + ...ArrayFrom({ length: 26 }, (_, i) => StringPrototypeCharCodeAt('A') + i), + + // Lowercase letters + ...ArrayFrom({ length: 26 }, (_, i) => StringPrototypeCharCodeAt('a') + i), + + // Decimal digits + ...ArrayFrom({ length: 10 }, (_, i) => StringPrototypeCharCodeAt('0') + i), + + 0x2B, // + + 0x2F, // / + 0x3D, // = +]; +const kEqualSignIndex = ArrayPrototypeIndexOf(kForgivingBase64AllowedChars, + 0x3D); + function atob(input) { + // The implementation here has not been performance optimized in any way and + // should not be. + // Refs: https://github.com/nodejs/node/pull/38433#issuecomment-828426932 if (arguments.length === 0) { throw new ERR_MISSING_ARGS('input'); } - const result = _atob(`${input}`); + input = `${input}`; + let nonAsciiWhitespaceCharCount = 0; + let equalCharCount = 0; + + for (let n = 0; n < input.length; n++) { + const index = ArrayPrototypeIndexOf( + kForgivingBase64AllowedChars, + StringPrototypeCharCodeAt(input, n)); + + if (index > 4) { + // The first 5 elements of `kForgivingBase64AllowedChars` are + // ASCII whitespace char codes. + nonAsciiWhitespaceCharCount++; + + if (index === kEqualSignIndex) { + equalCharCount++; + } else if (equalCharCount) { + // The `=` char is only allowed at the end. + throw lazyDOMException('Invalid character', 'InvalidCharacterError'); + } - switch (result) { - case -2: // Invalid character + if (equalCharCount > 2) { + // Only one more `=` is permitted after the first equal sign. + throw lazyDOMException('Invalid character', 'InvalidCharacterError'); + } + } else if (index === -1) { throw lazyDOMException('Invalid character', 'InvalidCharacterError'); - case -1: // Single character remained - throw lazyDOMException( - 'The string to be decoded is not correctly encoded.', - 'InvalidCharacterError'); - case -3: // Possible overflow - // TODO(@anonrig): Throw correct error in here. - throw lazyDOMException('The input causes overflow.', 'InvalidCharacterError'); - default: - return result; + } } + + let reminder = nonAsciiWhitespaceCharCount % 4; + + // See #2, #3, #4 - https://infra.spec.whatwg.org/#forgiving-base64 + if (!reminder) { + // Remove all trailing `=` characters and get the new reminder. + reminder = (nonAsciiWhitespaceCharCount - equalCharCount) % 4; + } else if (equalCharCount) { + // `=` should not in the input if there's a reminder. + throw lazyDOMException('Invalid character', 'InvalidCharacterError'); + } + + // See #3 - https://infra.spec.whatwg.org/#forgiving-base64 + if (reminder === 1) { + throw lazyDOMException( + 'The string to be decoded is not correctly encoded.', + 'InvalidCharacterError'); + } + + return Buffer.from(input, 'base64').toString('latin1'); } function isUtf8(input) { @@ -1287,7 +1352,7 @@ function isUtf8(input) { return bindingIsUtf8(input); } - throw new ERR_INVALID_ARG_TYPE('input', ['ArrayBuffer', 'Buffer', 'TypedArray'], input); + throw new ERR_INVALID_ARG_TYPE('input', ['TypedArray', 'Buffer'], input); } function isAscii(input) { diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 6e141b974131cc..149cf24fb21a13 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -30,14 +30,13 @@ #include "env-inl.h" #include "simdutf.h" #include "string_bytes.h" - +#include "string_search.h" #include "util-inl.h" #include "v8-fast-api-calls.h" #include "v8.h" -#include #include -#include "nbytes.h" +#include #define THROW_AND_RETURN_UNLESS_BUFFER(env, obj) \ THROW_AND_RETURN_IF_NOT_BUFFER(env, obj, "argument") \ @@ -58,7 +57,6 @@ using v8::ArrayBufferView; using v8::BackingStore; using v8::Context; using v8::EscapableHandleScope; -using v8::FastApiTypedArray; using v8::FunctionCallbackInfo; using v8::Global; using v8::HandleScope; @@ -69,7 +67,6 @@ using v8::Just; using v8::Local; using v8::Maybe; using v8::MaybeLocal; -using v8::NewStringType; using v8::Nothing; using v8::Number; using v8::Object; @@ -327,13 +324,8 @@ MaybeLocal New(Isolate* isolate, CHECK(actual <= length); if (LIKELY(actual > 0)) { - if (actual < length) { - std::unique_ptr old_store = std::move(store); - store = ArrayBuffer::NewBackingStore(isolate, actual); - memcpy(static_cast(store->Data()), - static_cast(old_store->Data()), - actual); - } + if (actual < length) + store = BackingStore::Reallocate(isolate, std::move(store), actual); Local buf = ArrayBuffer::New(isolate, std::move(store)); Local obj; if (UNLIKELY(!New(isolate, buf, 0, actual).ToLocal(&obj))) @@ -576,40 +568,44 @@ void StringSlice(const FunctionCallbackInfo& args) { args.GetReturnValue().Set(ret); } -// Assume caller has properly validated args. -void SlowCopy(const FunctionCallbackInfo& args) { +// bytesCopied = copy(buffer, target[, targetStart][, sourceStart][, sourceEnd]) +void Copy(const FunctionCallbackInfo &args) { Environment* env = Environment::GetCurrent(args); + THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); + THROW_AND_RETURN_UNLESS_BUFFER(env, args[1]); ArrayBufferViewContents source(args[0]); - SPREAD_BUFFER_ARG(args[1].As(), target); + Local target_obj = args[1].As(); + SPREAD_BUFFER_ARG(target_obj, target); - const auto target_start = args[2]->Uint32Value(env->context()).ToChecked(); - const auto source_start = args[3]->Uint32Value(env->context()).ToChecked(); - const auto to_copy = args[4]->Uint32Value(env->context()).ToChecked(); + size_t target_start = 0; + size_t source_start = 0; + size_t source_end = 0; - memmove(target_data + target_start, source.data() + source_start, to_copy); - args.GetReturnValue().Set(to_copy); -} + THROW_AND_RETURN_IF_OOB(ParseArrayIndex(env, args[2], 0, &target_start)); + THROW_AND_RETURN_IF_OOB(ParseArrayIndex(env, args[3], 0, &source_start)); + THROW_AND_RETURN_IF_OOB(ParseArrayIndex(env, args[4], source.length(), + &source_end)); -// Assume caller has properly validated args. -uint32_t FastCopy(Local receiver, - const v8::FastApiTypedArray& source, - const v8::FastApiTypedArray& target, - uint32_t target_start, - uint32_t source_start, - uint32_t to_copy) { - uint8_t* source_data; - CHECK(source.getStorageIfAligned(&source_data)); + // Copy 0 bytes; we're done + if (target_start >= target_length || source_start >= source_end) + return args.GetReturnValue().Set(0); - uint8_t* target_data; - CHECK(target.getStorageIfAligned(&target_data)); + if (source_start > source.length()) + return THROW_ERR_OUT_OF_RANGE( + env, "The value of \"sourceStart\" is out of range."); - memmove(target_data + target_start, source_data + source_start, to_copy); + if (source_end - source_start > target_length - target_start) + source_end = source_start + target_length - target_start; - return to_copy; + uint32_t to_copy = std::min( + std::min(source_end - source_start, target_length - target_start), + source.length() - source_start); + + memmove(target_data + target_start, source.data() + source_start, to_copy); + args.GetReturnValue().Set(to_copy); } -static v8::CFunction fast_copy(v8::CFunction::Make(FastCopy)); void Fill(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); @@ -663,8 +659,8 @@ void Fill(const FunctionCallbackInfo& args) { } else if (enc == UCS2) { str_length = str_obj->Length() * sizeof(uint16_t); node::TwoByteValue str(env->isolate(), args[1]); - if constexpr (IsBigEndian()) - CHECK(nbytes::SwapBytes16(reinterpret_cast(&str[0]), str_length)); + if (IsBigEndian()) + SwapBytes16(reinterpret_cast(&str[0]), str_length); memcpy(ts_obj_data + start, *str, std::min(str_length, fill_length)); @@ -764,6 +760,60 @@ uint32_t FastByteLengthUtf8(Local receiver, static v8::CFunction fast_byte_length_utf8( v8::CFunction::Make(FastByteLengthUtf8)); + +template +void SlowWriteString(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + + THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); + SPREAD_BUFFER_ARG(args[0], ts_obj); + + THROW_AND_RETURN_IF_NOT_STRING(env, args[1], "argument"); + + Local str = args[1]->ToString(env->context()).ToLocalChecked(); + + size_t offset = 0; + size_t max_length = 0; + + THROW_AND_RETURN_IF_OOB(ParseArrayIndex(env, args[2], 0, &offset)); + if (offset > ts_obj_length) { + return node::THROW_ERR_BUFFER_OUT_OF_BOUNDS( + env, "\"offset\" is outside of buffer bounds"); + } + + THROW_AND_RETURN_IF_OOB(ParseArrayIndex(env, args[3], ts_obj_length - offset, + &max_length)); + + max_length = std::min(ts_obj_length - offset, max_length); + + if (max_length == 0) + return args.GetReturnValue().Set(0); + + uint32_t written = StringBytes::Write( + env->isolate(), ts_obj_data + offset, max_length, str, encoding); + args.GetReturnValue().Set(written); +} + +uint32_t FastWriteString(Local receiver, + const v8::FastApiTypedArray& dst, + const v8::FastOneByteString& src, + uint32_t offset, + uint32_t max_length) { + uint8_t* dst_data; + CHECK(dst.getStorageIfAligned(&dst_data)); + + if (offset > dst.length()) { + // TODO: Throw "\"offset\" is outside of buffer bound + } + + memcpy(dst_data, src.data, max_length); + + return max_length; +} + +static v8::CFunction fast_write_string( + v8::CFunction::Make(FastWriteString)); + // Normalize val to be an integer in the range of [1, -1] since // implementations of memcmp() can vary by platform. static int normalizeCompareVal(int val, size_t a_length, size_t b_length) { @@ -841,23 +891,6 @@ void Compare(const FunctionCallbackInfo &args) { args.GetReturnValue().Set(val); } -int32_t FastCompare(v8::Local, - const FastApiTypedArray& a, - const FastApiTypedArray& b) { - uint8_t* data_a; - uint8_t* data_b; - CHECK(a.getStorageIfAligned(&data_a)); - CHECK(b.getStorageIfAligned(&data_b)); - - size_t cmp_length = std::min(a.length(), b.length()); - - return normalizeCompareVal( - cmp_length > 0 ? memcmp(data_a, data_b, cmp_length) : 0, - a.length(), - b.length()); -} - -static v8::CFunction fast_compare(v8::CFunction::Make(FastCompare)); // Computes the offset for starting an indexOf or lastIndexOf search. // Returns either a valid offset in [0...], ie inside the Buffer, @@ -957,7 +990,7 @@ void IndexOfString(const FunctionCallbackInfo& args) { return args.GetReturnValue().Set(-1); } - if constexpr (IsBigEndian()) { + if (IsBigEndian()) { StringBytes::InlineDecoder decoder; if (decoder.Decode(env, needle, enc).IsNothing()) return; const uint16_t* decoded_string = @@ -966,20 +999,19 @@ void IndexOfString(const FunctionCallbackInfo& args) { if (decoded_string == nullptr) return args.GetReturnValue().Set(-1); - result = nbytes::SearchString(reinterpret_cast(haystack), - haystack_length / 2, - decoded_string, - decoder.size() / 2, - offset / 2, - is_forward); + result = SearchString(reinterpret_cast(haystack), + haystack_length / 2, + decoded_string, + decoder.size() / 2, + offset / 2, + is_forward); } else { - result = - nbytes::SearchString(reinterpret_cast(haystack), - haystack_length / 2, - reinterpret_cast(*needle_value), - needle_value.length(), - offset / 2, - is_forward); + result = SearchString(reinterpret_cast(haystack), + haystack_length / 2, + reinterpret_cast(*needle_value), + needle_value.length(), + offset / 2, + is_forward); } result *= 2; } else if (enc == UTF8) { @@ -987,13 +1019,12 @@ void IndexOfString(const FunctionCallbackInfo& args) { if (*needle_value == nullptr) return args.GetReturnValue().Set(-1); - result = - nbytes::SearchString(reinterpret_cast(haystack), - haystack_length, - reinterpret_cast(*needle_value), - needle_length, - offset, - is_forward); + result = SearchString(reinterpret_cast(haystack), + haystack_length, + reinterpret_cast(*needle_value), + needle_length, + offset, + is_forward); } else if (enc == LATIN1) { uint8_t* needle_data = node::UncheckedMalloc(needle_length); if (needle_data == nullptr) { @@ -1002,12 +1033,12 @@ void IndexOfString(const FunctionCallbackInfo& args) { needle->WriteOneByte( isolate, needle_data, 0, needle_length, String::NO_NULL_TERMINATION); - result = nbytes::SearchString(reinterpret_cast(haystack), - haystack_length, - needle_data, - needle_length, - offset, - is_forward); + result = SearchString(reinterpret_cast(haystack), + haystack_length, + needle_data, + needle_length, + offset, + is_forward); free(needle_data); } @@ -1066,83 +1097,65 @@ void IndexOfBuffer(const FunctionCallbackInfo& args) { if (haystack_length < 2 || needle_length < 2) { return args.GetReturnValue().Set(-1); } - result = nbytes::SearchString(reinterpret_cast(haystack), - haystack_length / 2, - reinterpret_cast(needle), - needle_length / 2, - offset / 2, - is_forward); + result = SearchString( + reinterpret_cast(haystack), + haystack_length / 2, + reinterpret_cast(needle), + needle_length / 2, + offset / 2, + is_forward); result *= 2; } else { - result = nbytes::SearchString(reinterpret_cast(haystack), - haystack_length, - reinterpret_cast(needle), - needle_length, - offset, - is_forward); + result = SearchString( + reinterpret_cast(haystack), + haystack_length, + reinterpret_cast(needle), + needle_length, + offset, + is_forward); } args.GetReturnValue().Set( result == haystack_length ? -1 : static_cast(result)); } -int32_t IndexOfNumber(const uint8_t* buffer_data, - size_t buffer_length, - uint32_t needle, - int64_t offset_i64, - bool is_forward) { - int64_t opt_offset = IndexOfOffset(buffer_length, offset_i64, 1, is_forward); - if (opt_offset <= -1 || buffer_length == 0) { - return -1; - } - size_t offset = static_cast(opt_offset); - CHECK_LT(offset, buffer_length); - - const void* ptr; - if (is_forward) { - ptr = memchr(buffer_data + offset, needle, buffer_length - offset); - } else { - ptr = nbytes::stringsearch::MemrchrFill(buffer_data, needle, offset + 1); - } - const uint8_t* ptr_uint8 = static_cast(ptr); - return ptr != nullptr ? static_cast(ptr_uint8 - buffer_data) : -1; -} - -void SlowIndexOfNumber(const FunctionCallbackInfo& args) { +void IndexOfNumber(const FunctionCallbackInfo& args) { CHECK(args[1]->IsUint32()); CHECK(args[2]->IsNumber()); CHECK(args[3]->IsBoolean()); THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]); - ArrayBufferViewContents buffer(args[0]); + ArrayBufferViewContents buffer(args[0]); uint32_t needle = args[1].As()->Value(); int64_t offset_i64 = args[2].As()->Value(); bool is_forward = args[3]->IsTrue(); - args.GetReturnValue().Set(IndexOfNumber( - buffer.data(), buffer.length(), needle, offset_i64, is_forward)); -} + int64_t opt_offset = + IndexOfOffset(buffer.length(), offset_i64, 1, is_forward); + if (opt_offset <= -1 || buffer.length() == 0) { + return args.GetReturnValue().Set(-1); + } + size_t offset = static_cast(opt_offset); + CHECK_LT(offset, buffer.length()); -int32_t FastIndexOfNumber(v8::Local, - const FastApiTypedArray& buffer, - uint32_t needle, - int64_t offset_i64, - bool is_forward) { - uint8_t* buffer_data; - CHECK(buffer.getStorageIfAligned(&buffer_data)); - return IndexOfNumber( - buffer_data, buffer.length(), needle, offset_i64, is_forward); + const void* ptr; + if (is_forward) { + ptr = memchr(buffer.data() + offset, needle, buffer.length() - offset); + } else { + ptr = node::stringsearch::MemrchrFill(buffer.data(), needle, offset + 1); + } + const char* ptr_char = static_cast(ptr); + args.GetReturnValue().Set(ptr ? static_cast(ptr_char - buffer.data()) + : -1); } -static v8::CFunction fast_index_of_number( - v8::CFunction::Make(FastIndexOfNumber)); void Swap16(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); SPREAD_BUFFER_ARG(args[0], ts_obj); - CHECK(nbytes::SwapBytes16(ts_obj_data, ts_obj_length)); + SwapBytes16(ts_obj_data, ts_obj_length); args.GetReturnValue().Set(args[0]); } @@ -1151,7 +1164,7 @@ void Swap32(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); SPREAD_BUFFER_ARG(args[0], ts_obj); - CHECK(nbytes::SwapBytes32(ts_obj_data, ts_obj_length)); + SwapBytes32(ts_obj_data, ts_obj_length); args.GetReturnValue().Set(args[0]); } @@ -1160,7 +1173,7 @@ void Swap64(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); SPREAD_BUFFER_ARG(args[0], ts_obj); - CHECK(nbytes::SwapBytes64(ts_obj_data, ts_obj_length)); + SwapBytes64(ts_obj_data, ts_obj_length); args.GetReturnValue().Set(args[0]); } @@ -1246,130 +1259,6 @@ void DetachArrayBuffer(const FunctionCallbackInfo& args) { } } -static void Btoa(const FunctionCallbackInfo& args) { - CHECK_EQ(args.Length(), 1); - Environment* env = Environment::GetCurrent(args); - THROW_AND_RETURN_IF_NOT_STRING(env, args[0], "argument"); - - Local input = args[0].As(); - MaybeStackBuffer buffer; - size_t written; - - if (input->IsExternalOneByte()) { // 8-bit case - auto ext = input->GetExternalOneByteStringResource(); - size_t expected_length = simdutf::base64_length_from_binary(ext->length()); - buffer.AllocateSufficientStorage(expected_length + 1); - buffer.SetLengthAndZeroTerminate(expected_length); - written = - simdutf::binary_to_base64(ext->data(), ext->length(), buffer.out()); - } else if (input->IsOneByte()) { - MaybeStackBuffer stack_buf(input->Length()); - input->WriteOneByte(env->isolate(), - stack_buf.out(), - 0, - input->Length(), - String::NO_NULL_TERMINATION); - - size_t expected_length = - simdutf::base64_length_from_binary(input->Length()); - buffer.AllocateSufficientStorage(expected_length + 1); - buffer.SetLengthAndZeroTerminate(expected_length); - written = - simdutf::binary_to_base64(reinterpret_cast(*stack_buf), - input->Length(), - buffer.out()); - } else { - String::Value value(env->isolate(), input); - MaybeStackBuffer stack_buf(value.length()); - size_t out_len = simdutf::convert_utf16_to_latin1( - reinterpret_cast(*value), - value.length(), - stack_buf.out()); - if (out_len == 0) { // error - return args.GetReturnValue().Set(-1); - } - size_t expected_length = simdutf::base64_length_from_binary(out_len); - buffer.AllocateSufficientStorage(expected_length + 1); - buffer.SetLengthAndZeroTerminate(expected_length); - written = simdutf::binary_to_base64(*stack_buf, out_len, buffer.out()); - } - - auto value = - String::NewFromOneByte(env->isolate(), - reinterpret_cast(buffer.out()), - NewStringType::kNormal, - written) - .ToLocalChecked(); - return args.GetReturnValue().Set(value); -} - -// In case of success, the decoded string is returned. -// In case of error, a negative value is returned: -// * -1 indicates a single character remained, -// * -2 indicates an invalid character, -// * -3 indicates a possible overflow (i.e., more than 2 GB output). -static void Atob(const FunctionCallbackInfo& args) { - CHECK_EQ(args.Length(), 1); - Environment* env = Environment::GetCurrent(args); - THROW_AND_RETURN_IF_NOT_STRING(env, args[0], "argument"); - - Local input = args[0].As(); - MaybeStackBuffer buffer; - simdutf::result result; - - if (input->IsExternalOneByte()) { // 8-bit case - auto ext = input->GetExternalOneByteStringResource(); - size_t expected_length = - simdutf::maximal_binary_length_from_base64(ext->data(), ext->length()); - buffer.AllocateSufficientStorage(expected_length); - buffer.SetLength(expected_length); - result = simdutf::base64_to_binary( - ext->data(), ext->length(), buffer.out(), simdutf::base64_default); - } else if (input->IsOneByte()) { - MaybeStackBuffer stack_buf(input->Length()); - input->WriteOneByte(args.GetIsolate(), - stack_buf.out(), - 0, - input->Length(), - String::NO_NULL_TERMINATION); - const char* data = reinterpret_cast(*stack_buf); - size_t expected_length = - simdutf::maximal_binary_length_from_base64(data, input->Length()); - buffer.AllocateSufficientStorage(expected_length); - buffer.SetLength(expected_length); - result = simdutf::base64_to_binary(data, input->Length(), buffer.out()); - } else { // 16-bit case - String::Value value(env->isolate(), input); - auto data = reinterpret_cast(*value); - size_t expected_length = - simdutf::maximal_binary_length_from_base64(data, value.length()); - buffer.AllocateSufficientStorage(expected_length); - buffer.SetLength(expected_length); - result = simdutf::base64_to_binary(data, value.length(), buffer.out()); - } - - if (result.error == simdutf::error_code::SUCCESS) { - auto value = - String::NewFromOneByte(env->isolate(), - reinterpret_cast(buffer.out()), - NewStringType::kNormal, - result.count) - .ToLocalChecked(); - return args.GetReturnValue().Set(value); - } - - // Default value is: "possible overflow" - int32_t error_code = -3; - - if (result.error == simdutf::error_code::INVALID_BASE64_CHARACTER) { - error_code = -2; - } else if (result.error == simdutf::error_code::BASE64_INPUT_REMAINDER) { - error_code = -1; - } - - args.GetReturnValue().Set(error_code); -} - namespace { std::pair DecomposeBufferToParts(Local buffer) { @@ -1432,9 +1321,6 @@ void Initialize(Local target, Environment* env = Environment::GetCurrent(context); Isolate* isolate = env->isolate(); - SetMethodNoSideEffect(context, target, "atob", Atob); - SetMethodNoSideEffect(context, target, "btoa", Btoa); - SetMethod(context, target, "setBufferPrototype", SetBufferPrototype); SetMethodNoSideEffect(context, target, "createFromString", CreateFromString); @@ -1443,16 +1329,12 @@ void Initialize(Local target, "byteLengthUtf8", SlowByteLengthUtf8, &fast_byte_length_utf8); - SetFastMethod(context, target, "copy", SlowCopy, &fast_copy); - SetFastMethodNoSideEffect(context, target, "compare", Compare, &fast_compare); + SetMethod(context, target, "copy", Copy); + SetMethodNoSideEffect(context, target, "compare", Compare); SetMethodNoSideEffect(context, target, "compareOffset", CompareOffset); SetMethod(context, target, "fill", Fill); SetMethodNoSideEffect(context, target, "indexOfBuffer", IndexOfBuffer); - SetFastMethodNoSideEffect(context, - target, - "indexOfNumber", - SlowIndexOfNumber, - &fast_index_of_number); + SetMethodNoSideEffect(context, target, "indexOfNumber", IndexOfNumber); SetMethodNoSideEffect(context, target, "indexOfString", IndexOfString); SetMethod(context, target, "detachArrayBuffer", DetachArrayBuffer); @@ -1494,6 +1376,22 @@ void Initialize(Local target, SetMethod(context, target, "ucs2Write", StringWrite); SetMethod(context, target, "utf8Write", StringWrite); + SetFastMethod(context, + target, + "asciiWriteStatic", + SlowWriteString, + &fast_write_string); + SetFastMethod(context, + target, + "latin1WriteStatic", + SlowWriteString, + &fast_write_string); + SetFastMethod(context, + target, + "utf8WriteStatic", + SlowWriteString, + &fast_write_string); + SetMethod(context, target, "getZeroFillToggle", GetZeroFillToggle); } @@ -1506,18 +1404,12 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(SlowByteLengthUtf8); registry->Register(fast_byte_length_utf8.GetTypeInfo()); registry->Register(FastByteLengthUtf8); - registry->Register(SlowCopy); - registry->Register(fast_copy.GetTypeInfo()); - registry->Register(FastCopy); + registry->Register(Copy); registry->Register(Compare); - registry->Register(FastCompare); - registry->Register(fast_compare.GetTypeInfo()); registry->Register(CompareOffset); registry->Register(Fill); registry->Register(IndexOfBuffer); - registry->Register(SlowIndexOfNumber); - registry->Register(FastIndexOfNumber); - registry->Register(fast_index_of_number.GetTypeInfo()); + registry->Register(IndexOfNumber); registry->Register(IndexOfString); registry->Register(Swap16); @@ -1535,6 +1427,9 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(StringSlice); registry->Register(StringSlice); + registry->Register(SlowWriteString); + registry->Register(fast_write_string.GetTypeInfo()); + registry->Register(FastWriteString); registry->Register(StringWrite); registry->Register(StringWrite); registry->Register(StringWrite); @@ -1546,9 +1441,6 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(DetachArrayBuffer); registry->Register(CopyArrayBuffer); - - registry->Register(Atob); - registry->Register(Btoa); } } // namespace Buffer diff --git a/src/node_external_reference.h b/src/node_external_reference.h index b59a3a9e9c957a..d4b57c533a04f9 100644 --- a/src/node_external_reference.h +++ b/src/node_external_reference.h @@ -56,6 +56,13 @@ using CFunctionWithInt64Fallback = void (*)(v8::Local, v8::FastApiCallbackOptions&); using CFunctionWithBool = void (*)(v8::Local, bool); +using CFunctionWriteString = + uint32_t (*)(v8::Local receiver, + const v8::FastApiTypedArray& dst, + const v8::FastOneByteString& src, + uint32_t offset, + uint32_t + using CFunctionBufferCopy = uint32_t (*)(v8::Local receiver, const v8::FastApiTypedArray& source, @@ -87,6 +94,7 @@ class ExternalReferenceRegistry { V(CFunctionWithDoubleReturnDouble) \ V(CFunctionWithInt64Fallback) \ V(CFunctionWithBool) \ + V(CFunctionWriteString) V(CFunctionBufferCopy) \ V(const v8::CFunctionInfo*) \ V(v8::FunctionCallback) \