-
Notifications
You must be signed in to change notification settings - Fork 29.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
229 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# Encoding | ||
|
||
<!-- introduced_in=REPLACEME --> | ||
|
||
> Stability: 1 - Experimental | ||
<!-- source_link=lib/encoding.js --> | ||
|
||
The `node:encoding` module provides unicode validation and transcoding. | ||
To access it: | ||
|
||
```mjs | ||
import encoding from 'node:encoding'; | ||
``` | ||
|
||
```cjs | ||
const encoding = require('node:encoding'); | ||
``` | ||
|
||
This module is only available under the `node:` scheme. The following will not | ||
work: | ||
|
||
```mjs | ||
import encoding from 'encoding'; | ||
``` | ||
|
||
```cjs | ||
const encoding = require('encoding'); | ||
``` | ||
|
||
## `isAscii(input)` | ||
|
||
<!-- YAML | ||
added: REPLACEME | ||
--> | ||
|
||
* input {Buffer | Uint8Array | string} The ASCII input to validate. | ||
* Returns: {boolean} Returns true if and only if the input is valid ASCII. | ||
|
||
This function is used to check if input contains ASCII code points (characters). | ||
|
||
## `isUtf8(input)` | ||
|
||
<!-- YAML | ||
added: REPLACEME | ||
--> | ||
|
||
* input {Buffer | Uint8Array} The UTF8 input to validate. | ||
* Returns: {boolean} Returns true if and only if the input is valid UTF8. | ||
|
||
This function is used to check if input contains UTF8 code points (characters). | ||
|
||
## `countUtf8(input)` | ||
|
||
<!-- YAML | ||
added: REPLACEME | ||
--> | ||
|
||
* input {Buffer | Uint8Array} | ||
* Returns: {number} | ||
|
||
This function is used to count the number of code points (characters) in the | ||
input assuming that it is a valid UTF8 input. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
'use strict'; | ||
|
||
const { | ||
isAscii: _isAscii, | ||
isUtf8: _isUtf8, | ||
countUtf8: _countUtf8, | ||
} = internalBinding('encoding_methods'); | ||
|
||
const { | ||
isUint8Array, | ||
} = require('internal/util/types'); | ||
|
||
const { | ||
emitExperimentalWarning, | ||
} = require('internal/util'); | ||
|
||
const { TextEncoder } = require('util'); | ||
const { Buffer } = require('buffer'); | ||
|
||
const encoder = new TextEncoder(); | ||
|
||
emitExperimentalWarning('Encoding'); | ||
|
||
function isAscii(input) { | ||
if (Buffer.isBuffer(input) || isUint8Array(input)) { | ||
return _isAscii(input.buffer); | ||
} | ||
|
||
if (typeof input === 'string') { | ||
const { buffer } = encoder.encode(input); | ||
return _isAscii(buffer); | ||
} | ||
|
||
return false; | ||
} | ||
|
||
function isUtf8(input) { | ||
if (Buffer.isBuffer(input) || isUint8Array(input)) { | ||
return _isUtf8(input.buffer); | ||
} | ||
|
||
return false; | ||
} | ||
|
||
function countUtf8(input) { | ||
if (Buffer.isBuffer(input) || isUint8Array(input)) { | ||
return _countUtf8(input.buffer); | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
module.exports = { | ||
isAscii, | ||
isUtf8, | ||
countUtf8, | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
#include "env-inl.h" | ||
#include "node.h" | ||
#include "node_errors.h" | ||
#include "node_external_reference.h" | ||
#include "util-inl.h" | ||
|
||
#include "simdutf.h" | ||
|
||
namespace node { | ||
|
||
using v8::ArrayBuffer; | ||
using v8::BackingStore; | ||
using v8::CFunction; | ||
using v8::Context; | ||
using v8::FastApiTypedArray; | ||
using v8::FunctionCallbackInfo; | ||
using v8::Isolate; | ||
using v8::Local; | ||
using v8::MaybeLocal; | ||
using v8::Object; | ||
using v8::String; | ||
using v8::Uint32Array; | ||
using v8::Uint8Array; | ||
using v8::Value; | ||
|
||
// TODO(anonrig): Replace this with encoding when encoding enum is renamed. | ||
namespace encoding_methods { | ||
|
||
static void IsAscii(const FunctionCallbackInfo<Value>& args) { | ||
CHECK_GE(args.Length(), 1); | ||
CHECK(args[0]->IsArrayBuffer()); | ||
Local<ArrayBuffer> input = args[0].As<ArrayBuffer>(); | ||
auto external_resource = static_cast<const char*>(input->Data()); | ||
args.GetReturnValue().Set( | ||
simdutf::validate_ascii(external_resource, input->ByteLength())); | ||
} | ||
|
||
static void IsUtf8(const FunctionCallbackInfo<Value>& args) { | ||
CHECK_GE(args.Length(), 1); | ||
CHECK(args[0]->IsArrayBuffer()); | ||
Local<ArrayBuffer> input = args[0].As<ArrayBuffer>(); | ||
auto external_resource = static_cast<const char*>(input->Data()); | ||
args.GetReturnValue().Set( | ||
simdutf::validate_utf8(external_resource, input->ByteLength())); | ||
} | ||
|
||
static void CountUtf8(const FunctionCallbackInfo<Value>& args) { | ||
CHECK_GE(args.Length(), 1); | ||
CHECK(args[0]->IsArrayBuffer()); | ||
Local<ArrayBuffer> input = args[0].As<ArrayBuffer>(); | ||
auto external_resource = static_cast<const char*>(input->Data()); | ||
int count = simdutf::count_utf8(external_resource, input->ByteLength()); | ||
args.GetReturnValue().Set(count); | ||
} | ||
|
||
static void Initialize(Local<Object> target, | ||
Local<Value> unused, | ||
Local<Context> context, | ||
void* priv) { | ||
SetMethodNoSideEffect(context, target, "isAscii", IsAscii); | ||
SetMethodNoSideEffect(context, target, "isUtf8", IsUtf8); | ||
SetMethodNoSideEffect(context, target, "countUtf8", CountUtf8); | ||
} | ||
|
||
void RegisterExternalReferences(ExternalReferenceRegistry* registry) { | ||
registry->Register(IsAscii); | ||
registry->Register(IsUtf8); | ||
registry->Register(CountUtf8); | ||
} | ||
|
||
} // namespace encoding_methods | ||
} // namespace node | ||
|
||
NODE_BINDING_CONTEXT_AWARE_INTERNAL(encoding_methods, | ||
node::encoding_methods::Initialize) | ||
NODE_BINDING_EXTERNAL_REFERENCE( | ||
encoding_methods, node::encoding_methods::RegisterExternalReferences) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
// Flags: --no-warnings | ||
'use strict'; | ||
require('../common'); | ||
|
||
const assert = require('assert'); | ||
const encoding = require('node:encoding'); | ||
const { TextEncoder } = require('util'); | ||
|
||
const encoder = new TextEncoder(); | ||
|
||
assert.deepStrictEqual(encoding.isAscii(encoder.encode('hello')), true); | ||
assert.deepStrictEqual(encoding.isAscii(encoder.encode('ğ')), false); | ||
assert.deepStrictEqual(encoding.isAscii('hello'), true); | ||
assert.deepStrictEqual(encoding.isAscii('ğ'), false); | ||
|
||
assert.deepStrictEqual(encoding.isUtf8(encoder.encode('hello')), true); | ||
assert.deepStrictEqual(encoding.isUtf8(encoder.encode('ğ')), true); | ||
assert.deepStrictEqual(encoding.isUtf8(Buffer.from([0xf8])), false); | ||
|
||
assert.deepStrictEqual(encoding.countUtf8(encoder.encode('hello')), 5); | ||
assert.deepStrictEqual(encoding.countUtf8(encoder.encode('Yağız')), 5); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
declare function InternalBinding(binding: 'encoding_methods'): { | ||
validateAscii(input: Uint8Array): boolean | ||
validateUtf8(input: Uint8Array): boolean | ||
countUtf8(input: Uint8Array): boolean | ||
}; |