Skip to content

Commit

Permalink
src: add node:encoding module
Browse files Browse the repository at this point in the history
  • Loading branch information
anonrig committed Dec 15, 2022
1 parent b2d4c4e commit 8df4bbf
Show file tree
Hide file tree
Showing 10 changed files with 229 additions and 0 deletions.
63 changes: 63 additions & 0 deletions doc/api/encoding.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Encoding

<!-- introduced_in=REPLACEME -->

> Stability: 1 - Experimental
<!-- source_link=lib/encoding.js -->

The `node:encoding` module provides unicode validation and transcoding.
To access it:

```mjs
import encoding from 'node:encoding';
```

```cjs
const encoding = require('node:encoding');
```

This module is only available under the `node:` scheme. The following will not
work:

```mjs
import encoding from 'encoding';
```

```cjs
const encoding = require('encoding');
```

## `isAscii(input)`

<!-- YAML
added: REPLACEME
-->

* input {Buffer | Uint8Array | string} The ASCII input to validate.
* Returns: {boolean} Returns true if and only if the input is valid ASCII.

This function is used to check if input contains ASCII code points (characters).

## `isUtf8(input)`

<!-- YAML
added: REPLACEME
-->

* input {Buffer | Uint8Array} The UTF8 input to validate.
* Returns: {boolean} Returns true if and only if the input is valid UTF8.

This function is used to check if input contains UTF8 code points (characters).

## `countUtf8(input)`

<!-- YAML
added: REPLACEME
-->

* input {Buffer | Uint8Array}
* Returns: {number}

This function is used to count the number of code points (characters) in the
input assuming that it is a valid UTF8 input.
1 change: 1 addition & 0 deletions doc/api/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
* [DNS](dns.md)
* [Domain](domain.md)
* [Errors](errors.md)
* [Encoding](encoding.md)
* [Events](events.md)
* [File system](fs.md)
* [Globals](globals.md)
Expand Down
57 changes: 57 additions & 0 deletions lib/encoding.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
'use strict';

const {
isAscii: _isAscii,
isUtf8: _isUtf8,
countUtf8: _countUtf8,
} = internalBinding('encoding_methods');

const {
isUint8Array,
} = require('internal/util/types');

const {
emitExperimentalWarning,
} = require('internal/util');

const { TextEncoder } = require('util');
const { Buffer } = require('buffer');

const encoder = new TextEncoder();

emitExperimentalWarning('Encoding');

function isAscii(input) {
if (Buffer.isBuffer(input) || isUint8Array(input)) {
return _isAscii(input.buffer);
}

if (typeof input === 'string') {
const { buffer } = encoder.encode(input);
return _isAscii(buffer);
}

return false;
}

function isUtf8(input) {
if (Buffer.isBuffer(input) || isUint8Array(input)) {
return _isUtf8(input.buffer);
}

return false;
}

function countUtf8(input) {
if (Buffer.isBuffer(input) || isUint8Array(input)) {
return _countUtf8(input.buffer);
}

return 0;
}

module.exports = {
isAscii,
isUtf8,
countUtf8,
};
2 changes: 2 additions & 0 deletions lib/internal/bootstrap/loaders.js
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ const internalBindingAllowlist = new SafeSet([
'constants',
'contextify',
'crypto',
'encoding_methods',
'fs',
'fs_event_wrap',
'http_parser',
Expand Down Expand Up @@ -124,6 +125,7 @@ const legacyWrapperList = new SafeSet([

// Modules that can only be imported via the node: scheme.
const schemelessBlockList = new SafeSet([
'encoding',
'test',
]);

Expand Down
1 change: 1 addition & 0 deletions node.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,7 @@
'src/node_dir.cc',
'src/node_env_var.cc',
'src/node_errors.cc',
'src/node_encoding.cc',
'src/node_external_reference.cc',
'src/node_file.cc',
'src/node_http_parser.cc',
Expand Down
1 change: 1 addition & 0 deletions src/node_binding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
V(contextify) \
V(credentials) \
V(errors) \
V(encoding_methods) \
V(fs) \
V(fs_dir) \
V(fs_event_wrap) \
Expand Down
77 changes: 77 additions & 0 deletions src/node_encoding.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#include "env-inl.h"
#include "node.h"
#include "node_errors.h"
#include "node_external_reference.h"
#include "util-inl.h"

#include "simdutf.h"

namespace node {

using v8::ArrayBuffer;
using v8::BackingStore;
using v8::CFunction;
using v8::Context;
using v8::FastApiTypedArray;
using v8::FunctionCallbackInfo;
using v8::Isolate;
using v8::Local;
using v8::MaybeLocal;
using v8::Object;
using v8::String;
using v8::Uint32Array;
using v8::Uint8Array;
using v8::Value;

// TODO(anonrig): Replace this with encoding when encoding enum is renamed.
namespace encoding_methods {

static void IsAscii(const FunctionCallbackInfo<Value>& args) {
CHECK_GE(args.Length(), 1);
CHECK(args[0]->IsArrayBuffer());
Local<ArrayBuffer> input = args[0].As<ArrayBuffer>();
auto external_resource = static_cast<const char*>(input->Data());
args.GetReturnValue().Set(
simdutf::validate_ascii(external_resource, input->ByteLength()));
}

static void IsUtf8(const FunctionCallbackInfo<Value>& args) {
CHECK_GE(args.Length(), 1);
CHECK(args[0]->IsArrayBuffer());
Local<ArrayBuffer> input = args[0].As<ArrayBuffer>();
auto external_resource = static_cast<const char*>(input->Data());
args.GetReturnValue().Set(
simdutf::validate_utf8(external_resource, input->ByteLength()));
}

static void CountUtf8(const FunctionCallbackInfo<Value>& args) {
CHECK_GE(args.Length(), 1);
CHECK(args[0]->IsArrayBuffer());
Local<ArrayBuffer> input = args[0].As<ArrayBuffer>();
auto external_resource = static_cast<const char*>(input->Data());
int count = simdutf::count_utf8(external_resource, input->ByteLength());
args.GetReturnValue().Set(count);
}

static void Initialize(Local<Object> target,
Local<Value> unused,
Local<Context> context,
void* priv) {
SetMethodNoSideEffect(context, target, "isAscii", IsAscii);
SetMethodNoSideEffect(context, target, "isUtf8", IsUtf8);
SetMethodNoSideEffect(context, target, "countUtf8", CountUtf8);
}

void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
registry->Register(IsAscii);
registry->Register(IsUtf8);
registry->Register(CountUtf8);
}

} // namespace encoding_methods
} // namespace node

NODE_BINDING_CONTEXT_AWARE_INTERNAL(encoding_methods,
node::encoding_methods::Initialize)
NODE_BINDING_EXTERNAL_REFERENCE(
encoding_methods, node::encoding_methods::RegisterExternalReferences)
1 change: 1 addition & 0 deletions src/node_external_reference.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class ExternalReferenceRegistry {
V(credentials) \
V(env_var) \
V(errors) \
V(encoding_methods) \
V(fs) \
V(fs_dir) \
V(fs_event_wrap) \
Expand Down
21 changes: 21 additions & 0 deletions test/parallel/test-encoding.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Flags: --no-warnings
'use strict';
require('../common');

const assert = require('assert');
const encoding = require('node:encoding');
const { TextEncoder } = require('util');

const encoder = new TextEncoder();

assert.deepStrictEqual(encoding.isAscii(encoder.encode('hello')), true);
assert.deepStrictEqual(encoding.isAscii(encoder.encode('ğ')), false);
assert.deepStrictEqual(encoding.isAscii('hello'), true);
assert.deepStrictEqual(encoding.isAscii('ğ'), false);

assert.deepStrictEqual(encoding.isUtf8(encoder.encode('hello')), true);
assert.deepStrictEqual(encoding.isUtf8(encoder.encode('ğ')), true);
assert.deepStrictEqual(encoding.isUtf8(Buffer.from([0xf8])), false);

assert.deepStrictEqual(encoding.countUtf8(encoder.encode('hello')), 5);
assert.deepStrictEqual(encoding.countUtf8(encoder.encode('Yağız')), 5);
5 changes: 5 additions & 0 deletions typings/internalBinding/encoding_methods.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
declare function InternalBinding(binding: 'encoding_methods'): {
validateAscii(input: Uint8Array): boolean
validateUtf8(input: Uint8Array): boolean
countUtf8(input: Uint8Array): boolean
};

0 comments on commit 8df4bbf

Please sign in to comment.