Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add buffer.transcode to nodejs_compat #2462

Merged
merged 2 commits into from
Aug 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
default:
anonrig marked this conversation as resolved.
Show resolved Hide resolved
@just --list

# Override this value by calling `just --set clang_version 18`
clang_version := "15"

prepare:
cargo install gen-compile-commands

Expand All @@ -6,3 +12,15 @@ compile-commands:

clean:
rm -f compile_commands.json

build *args="//...":
bazel build {{args}} --action_env=CC=clang-{{clang_version}} --action_env=CXX=clang++-{{clang_version}}

build-asan *args="//...":
just build {{args}} --config=asan --sandbox_debug

test *args="//...":
bazel test {{args}} --action_env=CC=clang-{{clang_version}} --action_env=CXX=clang++-{{clang_version}} --test_env=LLVM_SYMBOLIZER=llvm-symbolizer-{{clang_version}}

test-asan *args="//...":
just test {{args}} --config=asan
3 changes: 3 additions & 0 deletions src/node/buffer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {
SlowBuffer,
isAscii,
isUtf8,
transcode,
} from 'node-internal:internal_buffer';

// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
Expand All @@ -30,6 +31,7 @@ export {
SlowBuffer,
isAscii,
isUtf8,
transcode,
};

export default {
Expand All @@ -46,4 +48,5 @@ export default {
SlowBuffer,
isAscii,
isUtf8,
transcode,
};
1 change: 1 addition & 0 deletions src/node/internal/buffer.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,4 @@ export function decode(buffer: Uint8Array, state: Uint8Array): string;
export function flush(state: Uint8Array): string;
export function isAscii(value: ArrayBufferView): boolean;
export function isUtf8(value: ArrayBufferView): boolean;
export function transcode(source: ArrayBufferView, fromEncoding: string, toEncoding: string): ArrayBuffer;
2 changes: 1 addition & 1 deletion src/node/internal/crypto_dh.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ let DiffieHellman = function (this: DiffieHellman, sizeOrKey: number|ArrayLike,
if (typeof sizeOrKey === 'number')
validateInt32(sizeOrKey, 'sizeOrKey');

if (keyEncoding && !Buffer.isEncoding(keyEncoding) && keyEncoding !== 'buffer') {
if (keyEncoding && keyEncoding !== 'buffer' && !Buffer.isEncoding(keyEncoding)) {
genEncoding = generator as any;
generator = keyEncoding;
keyEncoding = "utf-8"; // default encoding
Expand Down
18 changes: 17 additions & 1 deletion src/node/internal/internal_buffer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,7 @@ export function compare(a: Buffer|Uint8Array, b: Buffer|Uint8Array) {

Buffer.compare = compare;

export function isEncoding(encoding: unknown) {
export function isEncoding(encoding: unknown): encoding is string {
return typeof encoding === "string" &&
encoding.length !== 0 &&
normalizeEncoding(encoding) !== undefined;
Expand Down Expand Up @@ -2294,6 +2294,22 @@ export function isUtf8(value: ArrayBufferView) {
return bufferUtil.isUtf8(value);
}

export function transcode(source: ArrayBufferView, fromEncoding: string, toEncoding: string) {
if (!isArrayBufferView(source)) {
throw new ERR_INVALID_ARG_TYPE('source', 'ArrayBufferView', typeof source);
}
const normalizedFromEncoding = normalizeEncoding(fromEncoding);
if (!Buffer.isEncoding(normalizedFromEncoding)) {
throw new ERR_UNKNOWN_ENCODING(fromEncoding);
}
const normalizedToEncoding = normalizeEncoding(toEncoding);
if (!Buffer.isEncoding(normalizedToEncoding)) {
throw new ERR_UNKNOWN_ENCODING(toEncoding);
}
// TODO(soon): Optimization opportunity: Pass int encoding values instead of strings.
return Buffer.from(bufferUtil.transcode(source, normalizedFromEncoding, normalizedToEncoding));
}

export default {
Buffer,
constants,
Expand Down
55 changes: 31 additions & 24 deletions src/workerd/api/node/buffer.c++
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@
#include "buffer-string-search.h"
#include <workerd/jsg/buffersource.h>
#include <kj/encoding.h>
#include <algorithm>
#include <kj/array.h>
#include "simdutf.h"
#include "i18n.h"

#include <algorithm>

// These are defined by <sys/byteorder.h> or <netinet/in.h> on some systems.
// To avoid warnings, undefine them before redefining them.
Expand Down Expand Up @@ -85,34 +88,24 @@ void SwapBytes(kj::ArrayPtr<kj::byte> bytes) {
}
}

enum class Encoding {
ASCII,
LATIN1,
UTF8,
UTF16LE,
BASE64,
BASE64URL,
HEX,
};

Encoding getEncoding(kj::StringPtr encoding) {
if (encoding == "utf8"_kj) {
inline Encoding getEncoding(kj::StringPtr input) {
if (input == "utf8"_kj) {
return Encoding::UTF8;
} else if (encoding == "ascii") {
} else if (input == "ascii"_kj) {
return Encoding::ASCII;
} else if (encoding == "latin1") {
} else if (input == "latin1"_kj) {
return Encoding::LATIN1;
} else if (encoding == "utf16le") {
} else if (input == "utf16le"_kj) {
return Encoding::UTF16LE;
} else if (encoding == "base64") {
} else if (input == "base64"_kj) {
return Encoding::BASE64;
} else if (encoding == "base64url") {
} else if (input == "base64url"_kj) {
return Encoding::BASE64URL;
} else if (encoding == "hex") {
} else if (input == "hex"_kj) {
return Encoding::HEX;
}

anonrig marked this conversation as resolved.
Show resolved Hide resolved
KJ_UNREACHABLE;
JSG_FAIL_REQUIRE(Error, kj::str("Invalid encoding: ", input));
}

kj::Maybe<uint> tryFromHexDigit(char c) {
Expand All @@ -137,7 +130,7 @@ kj::Array<byte> decodeHexTruncated(kj::ArrayPtr<kj::byte> text, bool strict = fa
}
text = text.slice(0, text.size() - 1);
}
kj::Vector vec = kj::Vector<kj::byte>(text.size() / 2);
auto vec = kj::Vector<kj::byte>(text.size() / 2);

for (size_t i = 0; i < text.size(); i += 2) {
byte b = 0;
Expand Down Expand Up @@ -216,8 +209,9 @@ uint32_t writeInto(
dest.first(amountToCopy).copyFrom(bytes.first(amountToCopy));
return amountToCopy;
}
default:
KJ_UNREACHABLE;
}
KJ_UNREACHABLE;
}

kj::Array<kj::byte> decodeStringImpl(
Expand Down Expand Up @@ -272,8 +266,9 @@ kj::Array<kj::byte> decodeStringImpl(
string.writeInto(js, buf, options);
return decodeHexTruncated(buf, strict);
}
default:
KJ_UNREACHABLE;
}
KJ_UNREACHABLE;
}
} // namespace

Expand Down Expand Up @@ -561,8 +556,9 @@ jsg::JsString toStringImpl(
case Encoding::HEX: {
return js.str(kj::encodeHex(slice));
}
default:
KJ_UNREACHABLE;
}
KJ_UNREACHABLE;
}

} // namespace
Expand Down Expand Up @@ -876,5 +872,16 @@ bool BufferUtil::isUtf8(kj::Array<kj::byte> buffer) {
return simdutf::validate_utf8(buffer.asChars().begin(), buffer.size());
}

kj::Array<kj::byte> BufferUtil::transcode(kj::Array<kj::byte> source, kj::String rawFromEncoding, kj::String rawToEncoding) {
auto fromEncoding = getEncoding(rawFromEncoding);
auto toEncoding = getEncoding(rawToEncoding);

JSG_REQUIRE(i18n::canBeTranscoded(fromEncoding) &&
i18n::canBeTranscoded(toEncoding), Error,
"Unable to transcode buffer due to unsupported encoding");

return i18n::transcode(source, fromEncoding, toEncoding);
}

} // namespace workerd::api::node {

4 changes: 4 additions & 0 deletions src/workerd/api/node/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ class BufferUtil final: public jsg::Object {
jsg::JsString flush(jsg::Lock& js, kj::Array<kj::byte> state);
bool isAscii(kj::Array<kj::byte> bytes);
bool isUtf8(kj::Array<kj::byte> bytes);
kj::Array<kj::byte> transcode(kj::Array<kj::byte> source,
kj::String rawFromEncoding,
kj::String rawToEncoding);

JSG_RESOURCE_TYPE(BufferUtil) {
JSG_METHOD(byteLength);
Expand All @@ -94,6 +97,7 @@ class BufferUtil final: public jsg::Object {
JSG_METHOD(write);
JSG_METHOD(isAscii);
JSG_METHOD(isUtf8);
JSG_METHOD(transcode);

// For StringDecoder
JSG_METHOD(decode);
Expand Down
Loading
Loading