Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

src: improve buffer.transcode performance #54153

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions benchmark/buffers/buffer-transcode.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
'use strict';
const common = require('../common.js');
const assert = require('node:assert');
const buffer = require('node:buffer');

const hasIntl = !!process.config.variables.v8_enable_i18n_support;
const encodings = ['latin1', 'ascii', 'ucs2', 'utf8'];

if (!hasIntl) {
console.log('Skipping: `transcode` is only available on platforms that support i18n`');
process.exit(0);
}

const bench = common.createBenchmark(main, {
fromEncoding: encodings,
toEncoding: encodings,
length: [1, 10, 1000],
n: [1e5],
}, {
combinationFilter(p) {
return !(p.fromEncoding === 'ucs2' && p.toEncoding === 'utf8');
},
});

function main({ n, fromEncoding, toEncoding, length }) {
const input = Buffer.from('a'.repeat(length));
let out = 0;
bench.start();
for (let i = 0; i < n; i++) {
const dest = buffer.transcode(input, fromEncoding, toEncoding);
out += dest.buffer.byteLength;
}
bench.end(n);
assert.ok(out >= 0);
}
107 changes: 44 additions & 63 deletions src/node_i18n.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@

#include "node_i18n.h"
#include "node_external_reference.h"
#include "simdutf.h"

#if defined(NODE_HAVE_I18N_SUPPORT)

Expand Down Expand Up @@ -147,7 +148,6 @@ MaybeLocal<Object> Transcode(Environment* env,
const char* source,
const size_t source_length,
UErrorCode* status) {
*status = U_ZERO_ERROR;
MaybeLocal<Object> ret;
MaybeStackBuffer<char> result;
Converter to(toEncoding);
Expand All @@ -170,22 +170,21 @@ MaybeLocal<Object> Transcode(Environment* env,
return ret;
}

MaybeLocal<Object> TranscodeToUcs2(Environment* env,
const char* fromEncoding,
const char* toEncoding,
const char* source,
const size_t source_length,
UErrorCode* status) {
*status = U_ZERO_ERROR;
MaybeLocal<Object> ret;
MaybeLocal<Object> TranscodeLatin1ToUcs2(Environment* env,
const char* fromEncoding,
const char* toEncoding,
const char* source,
const size_t source_length,
UErrorCode* status) {
MaybeStackBuffer<UChar> destbuf(source_length);
Converter from(fromEncoding);
const size_t length_in_chars = source_length * sizeof(UChar);
ucnv_toUChars(from.conv(), *destbuf, length_in_chars,
source, source_length, status);
if (U_SUCCESS(*status))
ret = ToBufferEndian(env, &destbuf);
return ret;
auto actual_length =
simdutf::convert_latin1_to_utf16le(source, source_length, destbuf.out());
if (actual_length == 0) {
*status = U_INVALID_CHAR_FOUND;
return {};
}

return Buffer::New(env, &destbuf);
}

MaybeLocal<Object> TranscodeFromUcs2(Environment* env,
Expand All @@ -194,13 +193,11 @@ MaybeLocal<Object> TranscodeFromUcs2(Environment* env,
const char* source,
const size_t source_length,
UErrorCode* status) {
*status = U_ZERO_ERROR;
MaybeStackBuffer<UChar> sourcebuf;
MaybeLocal<Object> ret;
Converter to(toEncoding);

size_t sublen = ucnv_getMinCharSize(to.conv());
std::string sub(sublen, '?');
std::string sub(to.min_char_size(), '?');
to.set_subst_chars(sub.c_str());

const size_t length_in_chars = source_length / sizeof(UChar);
Expand All @@ -221,26 +218,18 @@ MaybeLocal<Object> TranscodeUcs2FromUtf8(Environment* env,
const char* source,
const size_t source_length,
UErrorCode* status) {
*status = U_ZERO_ERROR;
MaybeStackBuffer<UChar> destbuf;
int32_t result_length;
u_strFromUTF8(*destbuf, destbuf.capacity(), &result_length,
source, source_length, status);
MaybeLocal<Object> ret;
if (U_SUCCESS(*status)) {
destbuf.SetLength(result_length);
ret = ToBufferEndian(env, &destbuf);
} else if (*status == U_BUFFER_OVERFLOW_ERROR) {
*status = U_ZERO_ERROR;
destbuf.AllocateSufficientStorage(result_length);
u_strFromUTF8(*destbuf, result_length, &result_length,
source, source_length, status);
if (U_SUCCESS(*status)) {
destbuf.SetLength(result_length);
ret = ToBufferEndian(env, &destbuf);
}
size_t expected_utf16_length =
simdutf::utf16_length_from_utf8(source, source_length);
MaybeStackBuffer<UChar> destbuf(expected_utf16_length);
auto actual_length =
simdutf::convert_utf8_to_utf16le(source, source_length, destbuf.out());

if (actual_length == 0) {
*status = U_INVALID_CHAR_FOUND;
return {};
}
return ret;

return Buffer::New(env, &destbuf);
}

MaybeLocal<Object> TranscodeUtf8FromUcs2(Environment* env,
Expand All @@ -249,32 +238,25 @@ MaybeLocal<Object> TranscodeUtf8FromUcs2(Environment* env,
const char* source,
const size_t source_length,
UErrorCode* status) {
*status = U_ZERO_ERROR;
MaybeLocal<Object> ret;
const size_t length_in_chars = source_length / sizeof(UChar);
int32_t result_length;
MaybeStackBuffer<UChar> sourcebuf;
MaybeStackBuffer<char> destbuf;
CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
u_strToUTF8(*destbuf, destbuf.capacity(), &result_length,
*sourcebuf, length_in_chars, status);
if (U_SUCCESS(*status)) {
destbuf.SetLength(result_length);
ret = ToBufferEndian(env, &destbuf);
} else if (*status == U_BUFFER_OVERFLOW_ERROR) {
*status = U_ZERO_ERROR;
destbuf.AllocateSufficientStorage(result_length);
u_strToUTF8(*destbuf, result_length, &result_length, *sourcebuf,
length_in_chars, status);
if (U_SUCCESS(*status)) {
destbuf.SetLength(result_length);
ret = ToBufferEndian(env, &destbuf);
}
size_t expected_utf8_length = simdutf::utf8_length_from_utf16le(
reinterpret_cast<const char16_t*>(source), length_in_chars);

MaybeStackBuffer<char> destbuf(expected_utf8_length);
auto actual_length = simdutf::convert_utf16le_to_utf8(
reinterpret_cast<const char16_t*>(source),
length_in_chars,
destbuf.out());

if (actual_length == 0) {
*status = U_INVALID_CHAR_FOUND;
return {};
}
return ret;

return Buffer::New(env, &destbuf);
}

const char* EncodingName(const enum encoding encoding) {
constexpr const char* EncodingName(const enum encoding encoding) {
switch (encoding) {
case ASCII: return "us-ascii";
case LATIN1: return "iso8859-1";
Expand All @@ -284,7 +266,7 @@ const char* EncodingName(const enum encoding encoding) {
}
}

bool SupportedEncoding(const enum encoding encoding) {
constexpr bool SupportedEncoding(const enum encoding encoding) {
switch (encoding) {
case ASCII:
case LATIN1:
Expand All @@ -309,8 +291,7 @@ void Transcode(const FunctionCallbackInfo<Value>&args) {
switch (fromEncoding) {
case ASCII:
case LATIN1:
if (toEncoding == UCS2)
tfn = &TranscodeToUcs2;
if (toEncoding == UCS2) tfn = &TranscodeLatin1ToUcs2;
break;
case UTF8:
if (toEncoding == UCS2)
Expand Down
Loading