Skip to content

Commit

Permalink
util: graduate TextEncoder/TextDecoder, tests
Browse files Browse the repository at this point in the history
Add tests ported from Web Platform Tests.

Graduate TextEncoder / TextDecoder from experimental

PR-URL: #15743
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
Reviewed-By: Refael Ackermann <refack@gmail.com>
Reviewed-By: Anna Henningsen <anna@addaleax.net>
Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com>
Reviewed-By: Timothy Gu <timothygu99@gmail.com>
  • Loading branch information
jasnell authored and gibfahn committed Oct 31, 2017
1 parent f00ba6b commit 8fd75fb
Show file tree
Hide file tree
Showing 12 changed files with 563 additions and 28 deletions.
4 changes: 0 additions & 4 deletions doc/api/util.md
Original file line number Diff line number Diff line change
Expand Up @@ -551,8 +551,6 @@ see [Custom promisified functions][].
added: v8.3.0
-->

> Stability: 1 - Experimental
An implementation of the [WHATWG Encoding Standard][] `TextDecoder` API.

```js
Expand Down Expand Up @@ -690,8 +688,6 @@ mark.
added: v8.3.0
-->

> Stability: 1 - Experimental
An implementation of the [WHATWG Encoding Standard][] `TextEncoder` API. All
instances of `TextEncoder` only support UTF-8 encoding.

Expand Down
20 changes: 0 additions & 20 deletions lib/internal/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,6 @@ const kEncoding = Symbol('encoding');
const kDecoder = Symbol('decoder');
const kEncoder = Symbol('encoder');

let warned = false;
const experimental =
'The WHATWG Encoding Standard implementation is an experimental API. It ' +
'should not yet be used in production applications.';

const {
getConstructorOf,
customInspectSymbol: inspect
Expand Down Expand Up @@ -289,11 +284,6 @@ function getEncodingFromLabel(label) {

class TextEncoder {
constructor() {
if (!warned) {
warned = true;
process.emitWarning(experimental, 'ExperimentalWarning');
}

this[kEncoder] = true;
}

Expand Down Expand Up @@ -353,11 +343,6 @@ function makeTextDecoderICU() {

class TextDecoder {
constructor(encoding = 'utf-8', options = {}) {
if (!warned) {
warned = true;
process.emitWarning(experimental, 'ExperimentalWarning');
}

encoding = `${encoding}`;
if (typeof options !== 'object')
throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object');
Expand Down Expand Up @@ -430,11 +415,6 @@ function makeTextDecoderJS() {

class TextDecoder {
constructor(encoding = 'utf-8', options = {}) {
if (!warned) {
warned = true;
process.emitWarning(experimental, 'ExperimentalWarning');
}

encoding = `${encoding}`;
if (typeof options !== 'object')
throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object');
Expand Down
76 changes: 76 additions & 0 deletions test/parallel/test-whatwg-encoding-fatal-streaming.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
'use strict';

// From: https://github.com/w3c/web-platform-tests/blob/d74324b53c/encoding/textdecoder-fatal-streaming.html

const common = require('../common');

if (!common.hasIntl)
common.skip('missing Intl');

const assert = require('assert');
const {
TextDecoder
} = require('util');


{
[
{ encoding: 'utf-8', sequence: [0xC0] },
{ encoding: 'utf-16le', sequence: [0x00] },
{ encoding: 'utf-16be', sequence: [0x00] }
].forEach((testCase) => {
const data = new Uint8Array([testCase.sequence]);
common.expectsError(
() => {
const decoder = new TextDecoder(testCase.encoding, { fatal: true });
decoder.decode(data);
}, {
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
type: TypeError,
message:
`The encoded data was not valid for encoding ${testCase.encoding}`
}
);

assert.strictEqual(
new TextDecoder(testCase.encoding).decode(data),
'\uFFFD'
);
});
}

{
const decoder = new TextDecoder('utf-16le', { fatal: true });
const odd = new Uint8Array([0x00]);
const even = new Uint8Array([0x00, 0x00]);

assert.strictEqual(decoder.decode(odd, { stream: true }), '');
assert.strictEqual(decoder.decode(odd), '\u0000');

common.expectsError(
() => {
decoder.decode(even, { stream: true });
decoder.decode(odd);
}, {
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
type: TypeError,
message:
'The encoded data was not valid for encoding utf-16le'
}
);

common.expectsError(
() => {
decoder.decode(odd, { stream: true });
decoder.decode(even);
}, {
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
type: TypeError,
message:
'The encoded data was not valid for encoding utf-16le'
}
);

assert.strictEqual(decoder.decode(even, { stream: true }), '\u0000');
assert.strictEqual(decoder.decode(even), '\u0000');
}
1 change: 1 addition & 0 deletions test/parallel/test-whatwg-encoding-internals.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
'use strict';

require('../common');

const assert = require('assert');
const { getEncodingFromLabel } = require('internal/encoding');

Expand Down
56 changes: 56 additions & 0 deletions test/parallel/test-whatwg-encoding-surrogates-utf8.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
'use strict';

// From: https://github.com/w3c/web-platform-tests/blob/fa9436d12c/encoding/api-surrogates-utf8.html

require('../common');

const assert = require('assert');
const {
TextDecoder,
TextEncoder
} = require('util');

const badStrings = [
{
input: 'abc123',
expected: [0x61, 0x62, 0x63, 0x31, 0x32, 0x33],
decoded: 'abc123',
name: 'Sanity check'
},
{
input: '\uD800',
expected: [0xef, 0xbf, 0xbd],
decoded: '\uFFFD',
name: 'Surrogate half (low)'
},
{
input: '\uDC00',
expected: [0xef, 0xbf, 0xbd],
decoded: '\uFFFD',
name: 'Surrogate half (high)'
},
{
input: 'abc\uD800123',
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
decoded: 'abc\uFFFD123',
name: 'Surrogate half (low), in a string'
},
{
input: 'abc\uDC00123',
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
decoded: 'abc\uFFFD123',
name: 'Surrogate half (high), in a string'
},
{
input: '\uDC00\uD800',
expected: [0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd],
decoded: '\uFFFD\uFFFD',
name: 'Wrong order'
}
];

badStrings.forEach((t) => {
const encoded = new TextEncoder().encode(t.input);
assert.deepStrictEqual([].slice.call(encoded), t.expected);
assert.strictEqual(new TextDecoder('utf-8').decode(encoded), t.decoded);
});
93 changes: 93 additions & 0 deletions test/parallel/test-whatwg-encoding-textdecoder-fatal.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
'use strict';

// From: https://github.com/w3c/web-platform-tests/blob/39a67e2fff/encoding/textdecoder-fatal.html

const common = require('../common');

if (!common.hasIntl)
common.skip('missing Intl');

const assert = require('assert');
const {
TextDecoder
} = require('util');

const bad = [
{ encoding: 'utf-8', input: [0xFF], name: 'invalid code' },
{ encoding: 'utf-8', input: [0xC0], name: 'ends early' },
{ encoding: 'utf-8', input: [0xE0], name: 'ends early 2' },
{ encoding: 'utf-8', input: [0xC0, 0x00], name: 'invalid trail' },
{ encoding: 'utf-8', input: [0xC0, 0xC0], name: 'invalid trail 2' },
{ encoding: 'utf-8', input: [0xE0, 0x00], name: 'invalid trail 3' },
{ encoding: 'utf-8', input: [0xE0, 0xC0], name: 'invalid trail 4' },
{ encoding: 'utf-8', input: [0xE0, 0x80, 0x00], name: 'invalid trail 5' },
{ encoding: 'utf-8', input: [0xE0, 0x80, 0xC0], name: 'invalid trail 6' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80],
name: '> 0x10FFFF' },
{ encoding: 'utf-8', input: [0xFE, 0x80, 0x80, 0x80, 0x80, 0x80],
name: 'obsolete lead byte' },
// Overlong encodings
{ encoding: 'utf-8', input: [0xC0, 0x80], name: 'overlong U+0000 - 2 bytes' },
{ encoding: 'utf-8', input: [0xE0, 0x80, 0x80],
name: 'overlong U+0000 - 3 bytes' },
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x80, 0x80],
name: 'overlong U+0000 - 4 bytes' },
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x80, 0x80],
name: 'overlong U+0000 - 5 bytes' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80],
name: 'overlong U+0000 - 6 bytes' },
{ encoding: 'utf-8', input: [0xC1, 0xBF], name: 'overlong U+007F - 2 bytes' },
{ encoding: 'utf-8', input: [0xE0, 0x81, 0xBF],
name: 'overlong U+007F - 3 bytes' },
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x81, 0xBF],
name: 'overlong U+007F - 4 bytes' },
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x81, 0xBF],
name: 'overlong U+007F - 5 bytes' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF],
name: 'overlong U+007F - 6 bytes' },
{ encoding: 'utf-8', input: [0xE0, 0x9F, 0xBF],
name: 'overlong U+07FF - 3 bytes' },
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x9F, 0xBF],
name: 'overlong U+07FF - 4 bytes' },
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x9F, 0xBF],
name: 'overlong U+07FF - 5 bytes' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF],
name: 'overlong U+07FF - 6 bytes' },
{ encoding: 'utf-8', input: [0xF0, 0x8F, 0xBF, 0xBF],
name: 'overlong U+FFFF - 4 bytes' },
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x8F, 0xBF, 0xBF],
name: 'overlong U+FFFF - 5 bytes' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF],
name: 'overlong U+FFFF - 6 bytes' },
{ encoding: 'utf-8', input: [0xF8, 0x84, 0x8F, 0xBF, 0xBF],
name: 'overlong U+10FFFF - 5 bytes' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x84, 0x8F, 0xBF, 0xBF],
name: 'overlong U+10FFFF - 6 bytes' },
// UTF-16 surrogates encoded as code points in UTF-8
{ encoding: 'utf-8', input: [0xED, 0xA0, 0x80], name: 'lead surrogate' },
{ encoding: 'utf-8', input: [0xED, 0xB0, 0x80], name: 'trail surrogate' },
{ encoding: 'utf-8', input: [0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80],
name: 'surrogate pair' },
{ encoding: 'utf-16le', input: [0x00], name: 'truncated code unit' },
// Mismatched UTF-16 surrogates are exercised in utf16-surrogates.html
// FIXME: Add legacy encoding cases
];

bad.forEach((t) => {
common.expectsError(
() => {
new TextDecoder(t.encoding, { fatal: true })
.decode(new Uint8Array(t.input));
}, {
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
type: TypeError
}
);
});

{
assert('fatal' in new TextDecoder());
assert.strictEqual(typeof new TextDecoder().fatal, 'boolean');
assert(!new TextDecoder().fatal);
assert(new TextDecoder('utf-8', { fatal: true }).fatal);
}
50 changes: 50 additions & 0 deletions test/parallel/test-whatwg-encoding-textdecoder-ignorebom.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
'use strict';

// From: https://github.com/w3c/web-platform-tests/blob/7f567fa29c/encoding/textdecoder-ignorebom.html

const common = require('../common');

const assert = require('assert');
const {
TextDecoder
} = require('util');

const cases = [
{
encoding: 'utf-8',
bytes: [0xEF, 0xBB, 0xBF, 0x61, 0x62, 0x63],
skipNoIntl: false
},
{
encoding: 'utf-16le',
bytes: [0xFF, 0xFE, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00],
skipNoIntl: false
},
{
encoding: 'utf-16be',
bytes: [0xFE, 0xFF, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63],
skipNoIntl: true
}
];

cases.forEach((testCase) => {
if (testCase.skipNoIntl && !common.hasIntl) {
console.log(`skipping ${testCase.encoding} because missing Intl`);
return; // skipping
}
const BOM = '\uFEFF';
let decoder = new TextDecoder(testCase.encoding, { ignoreBOM: true });
const bytes = new Uint8Array(testCase.bytes);
assert.strictEqual(decoder.decode(bytes), `${BOM}abc`);
decoder = new TextDecoder(testCase.encoding, { ignoreBOM: false });
assert.strictEqual(decoder.decode(bytes), 'abc');
decoder = new TextDecoder(testCase.encoding);
assert.strictEqual(decoder.decode(bytes), 'abc');
});

{
assert('ignoreBOM' in new TextDecoder());
assert.strictEqual(typeof new TextDecoder().ignoreBOM, 'boolean');
assert(!new TextDecoder().ignoreBOM);
assert(new TextDecoder('utf-8', { ignoreBOM: true }).ignoreBOM);
}
Loading

0 comments on commit 8fd75fb

Please sign in to comment.