From 4fe9aa656e6238350c40ea4a62de97047e91efe0 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Mon, 8 Apr 2024 14:05:25 -0400 Subject: [PATCH] buffer: improve `base64` and `base64url` performance Co-authored-by: Daniel Lemire --- src/string_bytes.cc | 104 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 90 insertions(+), 14 deletions(-) diff --git a/src/string_bytes.cc b/src/string_bytes.cc index b3c0a90b548c70..a92821d1f3c4be 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -346,14 +346,78 @@ size_t StringBytes::Write(Isolate* isolate, } case BASE64URL: - // Fall through + if (str->IsExternalOneByte()) { // 8-bit case + auto ext = str->GetExternalOneByteStringResource(); + // Try with WHATWG base64 standard first, adapted for base64url + simdutf::result r = simdutf::base64_to_binary_safe( + ext->data(), ext->length(), buf, buflen, simdutf::base64_url); + if (r.error == simdutf::error_code::SUCCESS) { + nbytes = buflen; + } else { + // The input does not follow the WHATWG forgiving-base64 specification + // adapted for base64url + // https://infra.spec.whatwg.org/#forgiving-base64-decode + nbytes = base64_decode(buf, buflen, ext->data(), ext->length()); + } + } else { // 16-bit case + // Typically, a base64url string is stored as an 8-bit string within v8. + // Thus str->IsOneByte() is typically true. The next line thus often + // allocates a temporary 16-bit buffer to store a 16-bit copy of the + // 8-bit v8 string. Hence the creation of the String::Value value is + // likely a performance bottleneck. + String::Value value(isolate, str); + // Try with WHATWG base64 standard first + simdutf::result r = simdutf::base64_to_binary_safe( + reinterpret_cast(*value), + value.length(), + buf, + buflen, + simdutf::base64_url); + if (r.error == simdutf::error_code::SUCCESS) { + nbytes = buflen; + } else { + // The input does not follow the WHATWG forgiving-base64 specification + // (adapted for base64url with + and / replaced by - and _). + // https://infra.spec.whatwg.org/#forgiving-base64-decode + nbytes = base64_decode(buf, buflen, *value, value.length()); + } + } + break; + case BASE64: - if (str->IsExternalOneByte()) { + if (str->IsExternalOneByte()) { // 8-bit case auto ext = str->GetExternalOneByteStringResource(); - nbytes = base64_decode(buf, buflen, ext->data(), ext->length()); - } else { + // Try with WHATWG base64 standard first + auto result = simdutf::base64_to_binary_safe( + ext->data(), ext->length(), buf, buflen, simdutf::base64_default); + if (result.error == simdutf::error_code::SUCCESS) { + nbytes = buflen; + } else { + // The input does not follow the WHATWG forgiving-base64 specification + // https://infra.spec.whatwg.org/#forgiving-base64-decode + nbytes = base64_decode(buf, buflen, ext->data(), ext->length()); + } + } else { // 16-bit case + // Typically, a base64 string is stored as an 8-bit string within v8. + // Thus str->IsOneByte() is typically true. The next line thus often + // allocates a temporary 16-bit buffer to store a 16-bit copy of the + // 8-bit v8 string. Hence, the creation of the String::Value value is + // likely a performance bottleneck. String::Value value(isolate, str); - nbytes = base64_decode(buf, buflen, *value, value.length()); + // Try with WHATWG base64 standard first + auto result = simdutf::base64_to_binary_safe( + reinterpret_cast(*value), + value.length(), + buf, + buflen, + simdutf::base64_default); + if (result.error == simdutf::error_code::SUCCESS) { + nbytes = buflen; + } else { + // The input does not follow the WHATWG base64 specification + // https://infra.spec.whatwg.org/#forgiving-base64-decode + nbytes = base64_decode(buf, buflen, *value, value.length()); + } } break; @@ -411,9 +475,12 @@ Maybe StringBytes::StorageSize(Isolate* isolate, break; case BASE64URL: - // Fall through + data_size = simdutf::base64_length_from_binary(str->Length(), + simdutf::base64_url); + break; + case BASE64: - data_size = base64_decoded_size_fast(str->Length()); + data_size = simdutf::base64_length_from_binary(str->Length()); break; case HEX: @@ -452,11 +519,16 @@ Maybe StringBytes::Size(Isolate* isolate, case UCS2: return Just(str->Length() * sizeof(uint16_t)); - case BASE64URL: - // Fall through + case BASE64URL: { + String::Value value(isolate, str); + return Just(simdutf::base64_length_from_binary(value.length(), + simdutf::base64_url)); + } + case BASE64: { String::Value value(isolate, str); - return Just(base64_decoded_size(*value, value.length())); + return Just(simdutf::base64_length_from_binary(value.length(), + simdutf::base64_default)); } case HEX: @@ -609,28 +681,32 @@ MaybeLocal StringBytes::Encode(Isolate* isolate, return ExternOneByteString::NewFromCopy(isolate, buf, buflen, error); case BASE64: { - size_t dlen = base64_encoded_size(buflen); + size_t dlen = + simdutf::base64_length_from_binary(buflen, simdutf::base64_default); char* dst = node::UncheckedMalloc(dlen); if (dst == nullptr) { *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate); return MaybeLocal(); } - size_t written = base64_encode(buf, buflen, dst, dlen); + size_t written = + simdutf::binary_to_base64(buf, buflen, dst, simdutf::base64_default); CHECK_EQ(written, dlen); return ExternOneByteString::New(isolate, dst, dlen, error); } case BASE64URL: { - size_t dlen = base64_encoded_size(buflen, Base64Mode::URL); + size_t dlen = + simdutf::base64_length_from_binary(buflen, simdutf::base64_url); char* dst = node::UncheckedMalloc(dlen); if (dst == nullptr) { *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate); return MaybeLocal(); } - size_t written = base64_encode(buf, buflen, dst, dlen, Base64Mode::URL); + size_t written = + simdutf::binary_to_base64(buf, buflen, dst, simdutf::base64_url); CHECK_EQ(written, dlen); return ExternOneByteString::New(isolate, dst, dlen, error);