From 3768409d59d595b856dc042729906dd3bd6cb753 Mon Sep 17 00:00:00 2001 From: Aditi Khare Date: Tue, 23 Apr 2024 17:58:15 -0400 Subject: [PATCH] fix(NODE-6123): utf8 validation is not strict enough --- .../require_vendor.mjs | 2 +- src/error.ts | 10 - src/parser/deserializer.ts | 14 +- src/test.ts | 9 - src/utils/node_byte_utils.ts | 26 +- src/utils/web_byte_utils.ts | 12 +- src/validate_utf8.ts | 89 +++---- test/node/byte_utils.test.ts | 250 +++++------------- 8 files changed, 108 insertions(+), 304 deletions(-) delete mode 100644 src/test.ts diff --git a/etc/rollup/rollup-plugin-require-vendor/require_vendor.mjs b/etc/rollup/rollup-plugin-require-vendor/require_vendor.mjs index bdfe9c11..b59fd572 100644 --- a/etc/rollup/rollup-plugin-require-vendor/require_vendor.mjs +++ b/etc/rollup/rollup-plugin-require-vendor/require_vendor.mjs @@ -15,7 +15,7 @@ export class RequireVendor { */ transform(code, id) { // TODO(NODE-4930) - if (!id.includes('web_byte_utils')) { + if (!id.includes('validate_utf8')) { return; } diff --git a/src/error.ts b/src/error.ts index 7dd101c5..ef5184a4 100644 --- a/src/error.ts +++ b/src/error.ts @@ -103,13 +103,3 @@ export class BSONOffsetError extends BSONError { this.offset = offset; } } - -export class BSONUTF8Error extends BSONError { - public get name(): 'BSONUTF8Error' { - return 'BSONUTF8Error'; - } - - constructor(message: string, options?: { cause?: unknown }) { - super(message, options); - } -} diff --git a/src/parser/deserializer.ts b/src/parser/deserializer.ts index ac278190..5f030291 100644 --- a/src/parser/deserializer.ts +++ b/src/parser/deserializer.ts @@ -16,7 +16,6 @@ import { BSONSymbol } from '../symbol'; import { Timestamp } from '../timestamp'; import { ByteUtils } from '../utils/byte_utils'; import { NumberUtils } from '../utils/number_utils'; -import { validateUtf8 } from '../validate_utf8'; /** @public */ export interface DeserializeOptions { @@ -603,13 +602,12 @@ function deserializeObject( buffer[index + stringSize - 1] !== 0 ) throw new BSONError('bad string length in bson'); - // Namespace - if (validation != null && validation.utf8) { - if (!validateUtf8(buffer, index, index + stringSize - 1)) { - throw new BSONError('Invalid UTF-8 string in BSON document'); - } - } - const namespace = ByteUtils.toUTF8(buffer, index, index + stringSize - 1, false); + const namespace = ByteUtils.toUTF8( + buffer, + index, + index + stringSize - 1, + validation != null && (validation.utf8 as boolean) + ); // Update parse index position index = index + stringSize; diff --git a/src/test.ts b/src/test.ts deleted file mode 100644 index 2ef044cf..00000000 --- a/src/test.ts +++ /dev/null @@ -1,9 +0,0 @@ -function parseUtf8Bits(arr: number[]): number { - arr[0] >>= (arr.length - 1); - for (let i = 1; i < arr.length; i++) { - arr[i] >>= 2; - arr[i] <<= i*8; - arr[0] = arr[0] | arr[i] - } - return arr[0]; -} \ No newline at end of file diff --git a/src/utils/node_byte_utils.ts b/src/utils/node_byte_utils.ts index 8603b45e..d9487046 100644 --- a/src/utils/node_byte_utils.ts +++ b/src/utils/node_byte_utils.ts @@ -1,4 +1,4 @@ -import { BSONError, BSONUTF8Error } from '../error'; +import { BSONError } from '../error'; import { validateUtf8 } from '../validate_utf8'; import { tryReadBasicLatin, tryWriteBasicLatin } from './latin'; @@ -27,28 +27,6 @@ type NodeJsBufferConstructor = Omit & { declare const Buffer: NodeJsBufferConstructor; declare const require: (mod: 'crypto') => { randomBytes: (byteLength: number) => Uint8Array }; -type TextDecoder = { - readonly encoding: string; - readonly fatal: boolean; - readonly ignoreBOM: boolean; - decode(input?: Uint8Array): string; -}; -type TextDecoderConstructor = { - new (label: 'utf8', options: { fatal: boolean; ignoreBOM?: boolean }): TextDecoder; -}; - -type TextEncoder = { - readonly encoding: string; - encode(input?: string): Uint8Array; -}; -type TextEncoderConstructor = { - new (): TextEncoder; -}; - -// Node byte utils global -declare const TextDecoder: TextDecoderConstructor; -declare const TextEncoder: TextEncoderConstructor; - /** @internal */ export function nodejsMathRandomBytes(byteLength: number) { return nodeJsByteUtils.fromNumberArray( @@ -161,7 +139,7 @@ export const nodeJsByteUtils = { // TODO(NODE-4930): Insufficiently strict BSON UTF8 validation for (let i = 0; i < string.length; i++) { if (string.charCodeAt(i) === 0xfffd) { - if (!validateUtf8(buffer, start, end)) { + if (!validateUtf8(buffer, start, end, fatal)) { throw new BSONError('Invalid UTF-8 string in BSON document'); } break; diff --git a/src/utils/web_byte_utils.ts b/src/utils/web_byte_utils.ts index e7d39b62..3fed2714 100644 --- a/src/utils/web_byte_utils.ts +++ b/src/utils/web_byte_utils.ts @@ -1,4 +1,5 @@ -import { BSONError, BSONUTF8Error } from '../error'; +import { BSONError } from '../error'; +import { validateUtf8 } from '../validate_utf8'; import { tryReadBasicLatin } from './latin'; type TextDecoder = { @@ -179,14 +180,7 @@ export const webByteUtils = { return basicLatin; } - if (fatal) { - try { - return new TextDecoder('utf8', { fatal }).decode(uint8array.slice(start, end)); - } catch (cause) { - throw new BSONUTF8Error('Invalid UTF-8 string in BSON document', { cause }); - } - } - return new TextDecoder('utf8', { fatal }).decode(uint8array.slice(start, end)); + return validateUtf8(uint8array, start, end, fatal); }, utf8ByteLength(input: string): number { diff --git a/src/validate_utf8.ts b/src/validate_utf8.ts index ba1acb7e..1d2a8156 100644 --- a/src/validate_utf8.ts +++ b/src/validate_utf8.ts @@ -1,21 +1,26 @@ -import { NumberUtils } from "./utils/number_utils"; - -const FIRST_BIT = 0x80; -const FIRST_TWO_BITS = 0xc0; -const FIRST_THREE_BITS = 0xe0; -const FIRST_FOUR_BITS = 0xf0; -const FIRST_FIVE_BITS = 0xf8; - -const TWO_BIT_CHAR = 0xc0; -const THREE_BIT_CHAR = 0xe0; -const FOUR_BIT_CHAR = 0xf0; -const CONTINUING_CHAR = 0x80; - -// max utf8 values representable in given number of bytes -const ONE_BYTE_MAX = 0x7f; -const TWO_BYTE_MAX = 0x7ff; -const THREE_BYTE_MAX = 0xf7ff; - +import { BSONError } from './error'; + +type TextDecoder = { + readonly encoding: string; + readonly fatal: boolean; + readonly ignoreBOM: boolean; + decode(input?: Uint8Array): string; +}; +type TextDecoderConstructor = { + new (label: 'utf8', options: { fatal: boolean; ignoreBOM?: boolean }): TextDecoder; +}; + +type TextEncoder = { + readonly encoding: string; + encode(input?: string): Uint8Array; +}; +type TextEncoderConstructor = { + new (): TextEncoder; +}; + +// Node byte utils global +declare const TextDecoder: TextDecoderConstructor; +declare const TextEncoder: TextEncoderConstructor; /** * Determines if the passed in bytes are valid utf8 @@ -24,45 +29,17 @@ const THREE_BYTE_MAX = 0xf7ff; * @param end - The index to end validating */ export function validateUtf8( - bytes: { [index: number]: number }, + buffer: Uint8Array, start: number, - end: number -): boolean { - let continuation = 0; - - for (let i = start; i < end; i += 1) { - const byte = bytes[i]; - - if (continuation) { - if ((byte & FIRST_TWO_BITS) !== CONTINUING_CHAR) { - return false; - } - continuation -= 1; - } else if (byte & FIRST_BIT && - parseUtf8Bytes([byte, bytes[i+1]]) > ONE_BYTE_MAX) { - if ((byte & FIRST_THREE_BITS) === TWO_BIT_CHAR) { - continuation = 1; - } else if ((byte & FIRST_FOUR_BITS) === THREE_BIT_CHAR && - parseUtf8Bytes([byte, bytes[i+1], bytes[i+2]]) > TWO_BYTE_MAX) { - continuation = 2; - } else if ((byte & FIRST_FIVE_BITS) === FOUR_BIT_CHAR && - parseUtf8Bytes([byte, bytes[i+1], bytes[i+2], bytes[i+3]]) > THREE_BYTE_MAX) { - continuation = 3; - } else { - return false; - } + end: number, + fatal: boolean +): string { + if (fatal) { + try { + return new TextDecoder('utf8', { fatal }).decode(buffer.slice(start, end)); + } catch (cause) { + throw new BSONError('Invalid UTF-8 string in BSON document', { cause }); } } - - return !continuation; + return new TextDecoder('utf8', { fatal }).decode(buffer.slice(start, end)); } - -function parseUtf8Bytes(arr: number[]): number { - arr[0] >>= (arr.length - 1); - for (let i = 1; i < arr.length; i++) { - arr[i] >>= 2; - arr[i] <<= i*8; - arr[0] = arr[0] | arr[i] - } - return arr[0]; -} \ No newline at end of file diff --git a/test/node/byte_utils.test.ts b/test/node/byte_utils.test.ts index 0a3e75ae..a41b19d3 100644 --- a/test/node/byte_utils.test.ts +++ b/test/node/byte_utils.test.ts @@ -8,7 +8,7 @@ import { webByteUtils } from '../../src/utils/web_byte_utils'; import * as sinon from 'sinon'; import { loadCJSModuleBSON, loadReactNativeCJSModuleBSON, loadESModuleBSON } from '../load_bson'; import * as crypto from 'node:crypto'; -import { BSONError, BSONUTF8Error } from '../../src/error'; +import { BSONError } from '../../src/error'; type ByteUtilTest = { name: string; @@ -401,7 +401,6 @@ const fromUTF8Tests: ByteUtilTest<'encodeUTF8Into'>[] = [ } ]; - const toUTF8Tests: ByteUtilTest<'toUTF8'>[] = [ { name: 'should create utf8 string from buffer input', @@ -428,7 +427,7 @@ const toUTF8Tests: ByteUtilTest<'toUTF8'>[] = [ } }, { - name: 'should throw an error if fatal is set and string is invalid', + name: 'should throw an error if fatal is set and string is a sequence that decodes to an invalid code point', inputs: [Buffer.from('616263f09fa4', 'hex'), 0, 7, true], expectation({ error }) { expect(error).to.match(/Invalid UTF-8 string in BSON document/i); @@ -454,144 +453,18 @@ const toUTF8Tests: ByteUtilTest<'toUTF8'>[] = [ expectation({ error }) { expect(error).to.match(/Invalid UTF-8 string in BSON document/i); } - }, - { inputs: [Buffer.from('0xFF', 'hex'), 0, 1, true], name: 'throws when provided with invalid code' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xC0', 'hex'), 0, 1, true], name: 'throws when provided with ends early' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xE0', 'hex'), 0, 1, true], name: 'throws when provided with ends early 2' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xC000', 'hex'), 0, 2, true], name: 'throws when provided with invalid trail' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xC0C0', 'hex'), 0, 2, true], name: 'throws when provided with invalid trail 2' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xE000', 'hex'), 0, 2, true], name: 'throws when provided with invalid trail 3' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xE0C0', 'hex'), 0, 2, true], name: 'throws when provided with invalid trail 4' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xE08000', 'hex'), 0, 3, true], name: 'throws when provided with invalid trail 5' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xE080C0', 'hex'), 0, 3, true], name: 'throws when provided with invalid trail 6' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xFC8080808080', 'hex'), 0, 6, true], name: 'throws when provided with > 0x10FFFF' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xFE8080808080', 'hex'), 0, 6, true], name: 'throws when provided with obsolete lead byte' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - - // Overlong encodings - { inputs: [Buffer.from('0xC080', 'hex'), 0, 2, true], name: 'throws when provided with overlong U+0000 - 2 bytes' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xE08080', 'hex'), 0, 3, true], name: 'throws when provided with overlong U+0000 - 3 bytes' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xF0808080', 'hex'), 0, 4, true], name: 'throws when provided with overlong U+0000 - 4 bytes' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xF880808080', 'hex'), 0, 5, true], name: 'throws when provided with overlong U+0000 - 5 bytes' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xFC8080808080', 'hex'), 0, 6, true], name: 'throws when provided with overlong U+0000 - 6 bytes' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - - { inputs: [Buffer.from('0xC1BF', 'hex'), 0, 2, true], name: 'throws when provided with overlong U+007F - 2 bytes' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xE081BF', 'hex'), 0, 3, true], name: 'throws when provided with overlong U+007F - 3 bytes' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } }, - { inputs: [Buffer.from('0xF08081BF', 'hex'), 0, 4, true], name: 'throws when provided with overlong U+007F - 4 bytes' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xF8808081BF', 'hex'), 0, 5, true], name: 'throws when provided with overlong U+007F - 5 bytes' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xFC80808081BF', 'hex'), 0, 6, true], name: 'throws when provided with overlong U+007F - 6 bytes' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - - { inputs: [Buffer.from('0xE09FBF', 'hex'), 0, 3, true], name: 'throws when provided with overlong U+07FF - 3 bytes' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xF0809FBF', 'hex'), 0, 4, true], name: 'throws when provided with overlong U+07FF - 4 bytes' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xF880809FBF', 'hex'), 0, 5, true], name: 'throws when provided with overlong U+07FF - 5 bytes' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xFC8080809FBF', 'hex'), 0, 6, true], name: 'throws when provided with overlong U+07FF - 6 bytes' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - - { inputs: [Buffer.from('0xF08FBFBF', 'hex'), 0, 4, true], name: 'throws when provided with overlong U+FFFF - 4 bytes' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xF8808FBFBF', 'hex'), 0, 5, true], name: 'throws when provided with overlong U+FFFF - 5 bytes' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xFC80808FBFBF', 'hex'), 0, 6, true], name: 'throws when provided with overlong U+FFFF - 6 bytes' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - - { inputs: [Buffer.from('0xF8848FBFBF', 'hex'), 0, 5, true], name: 'throws when provided with overlong U+10FFFF - 5 bytes' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xFC80848FBFBF', 'hex'), 0, 6, true], name: 'throws when provided with overlong U+10FFFF - 6 bytes' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - - // UTF-16 surrogates encoded as code points in UTF-8 - { inputs: [Buffer.from('0xEDA080', 'hex'), 0, 3, true], name: 'throws when provided with lead surrogate' , expectation({ error }) { - expect(error).to.match(/Invalid UTF-8 string in BSON document/i); - } - }, - { inputs: [Buffer.from('0xEDB080', 'hex'), 0, 3, true], name: 'throws when provided with trail surrogate' , expectation({ error }) { + { + name: 'throw an error if fatal is set and string contains a non-continuation byte before the end of the character', + inputs: [Buffer.from('c000', 'hex'), 0, 2, true], + expectation({ error }) { expect(error).to.match(/Invalid UTF-8 string in BSON document/i); } }, - { inputs: [Buffer.from('0xEDA080EDB080', 'hex'), 0, 6, true], name: 'throws when provided with surrogate pair' , expectation({ error }) { + { + name: 'throw an error if fatal is set and string ends before the end of the character', + inputs: [Buffer.from('c0', 'hex'), 0, 1, true], + expectation({ error }) { expect(error).to.match(/Invalid UTF-8 string in BSON document/i); } } @@ -658,6 +531,51 @@ const randomBytesTests: ByteUtilTest<'randomBytes'>[] = [ } ]; +// extra error cases copied from Web platform specs +const toUTF8ErrorCaseTests = [ + { input: [0xff], name: 'invalid code' }, + { input: [0xc0], name: 'ends early' }, + { input: [0xe0], name: 'ends early 2' }, + { input: [0xc0, 0x00], name: 'invalid trail' }, + { input: [0xc0, 0xc0], name: 'invalid trail 2' }, + { input: [0xe0, 0x00], name: 'invalid trail 3' }, + { input: [0xe0, 0xc0], name: 'invalid trail 4' }, + { input: [0xe0, 0x80, 0x00], name: 'invalid trail 5' }, + { input: [0xe0, 0x80, 0xc0], name: 'invalid trail 6' }, + { input: [0xfc, 0x80, 0x80, 0x80, 0x80, 0x80], name: '> 0x10ffff' }, + { input: [0xfe, 0x80, 0x80, 0x80, 0x80, 0x80], name: 'obsolete lead byte' }, + + // Overlong encodings + { input: [0xc0, 0x80], name: 'overlong U+0000 - 2 bytes' }, + { input: [0xe0, 0x80, 0x80], name: 'overlong U+0000 - 3 bytes' }, + { input: [0xf0, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 4 bytes' }, + { input: [0xf8, 0x80, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 5 bytes' }, + { input: [0xfc, 0x80, 0x80, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 6 bytes' }, + + { input: [0xc1, 0xbf], name: 'overlong U+007f - 2 bytes' }, + { input: [0xe0, 0x81, 0xbf], name: 'overlong U+007f - 3 bytes' }, + { input: [0xf0, 0x80, 0x81, 0xbf], name: 'overlong U+007f - 4 bytes' }, + { input: [0xf8, 0x80, 0x80, 0x81, 0xbf], name: 'overlong U+007f - 5 bytes' }, + { input: [0xfc, 0x80, 0x80, 0x80, 0x81, 0xbf], name: 'overlong U+007f - 6 bytes' }, + + { input: [0xe0, 0x9f, 0xbf], name: 'overlong U+07ff - 3 bytes' }, + { input: [0xf0, 0x80, 0x9f, 0xbf], name: 'overlong U+07ff - 4 bytes' }, + { input: [0xf8, 0x80, 0x80, 0x9f, 0xbf], name: 'overlong U+07ff - 5 bytes' }, + { input: [0xfc, 0x80, 0x80, 0x80, 0x9f, 0xbf], name: 'overlong U+07ff - 6 bytes' }, + + { input: [0xf0, 0x8f, 0xbf, 0xbf], name: 'overlong U+ffff - 4 bytes' }, + { input: [0xf8, 0x80, 0x8f, 0xbf, 0xbf], name: 'overlong U+ffff - 5 bytes' }, + { input: [0xfc, 0x80, 0x80, 0x8f, 0xbf, 0xbf], name: 'overlong U+ffff - 6 bytes' }, + + { input: [0xf8, 0x84, 0x8f, 0xbf, 0xbf], name: 'overlong U+10ffff - 5 bytes' }, + { input: [0xfc, 0x80, 0x84, 0x8f, 0xbf, 0xbf], name: 'overlong U+10ffff - 6 bytes' }, + + // UTf-16 surrogates encoded as code points in UTf-8 + { input: [0xed, 0xa0, 0x80], name: 'lead surrogate' }, + { input: [0xed, 0xb0, 0x80], name: 'trail surrogate' }, + { input: [0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80], name: 'surrogate pair' } +]; + const utils = new Map([ ['nodeJsByteUtils', nodeJsByteUtils], ['webByteUtils', webByteUtils] @@ -963,57 +881,15 @@ describe('ByteUtils', () => { test.expectation({ web: byteUtilsName === 'webByteUtils', output, error }); }); } + if (utility === 'toUTF8') + for (const test of toUTF8ErrorCaseTests) { + it(`throws error when fatal is set and provided ${test.name} as input`, () => { + expect(() => + byteUtils[utility](Uint8Array.from(test.input), 0, test.input.length, true) + ).to.throw(BSONError, /Invalid UTF-8 string in BSON document/i); + }); + } }); } } - - let bad = [ - { encoding: 'utf-8', input: [0xFF], name: 'invalid code' }, - { encoding: 'utf-8', input: [0xC0], name: 'ends early' }, - { encoding: 'utf-8', input: [0xE0], name: 'ends early 2' }, - { encoding: 'utf-8', input: [0xC0, 0x00], name: 'invalid trail' }, - { encoding: 'utf-8', input: [0xC0, 0xC0], name: 'invalid trail 2' }, - { encoding: 'utf-8', input: [0xE0, 0x00], name: 'invalid trail 3' }, - { encoding: 'utf-8', input: [0xE0, 0xC0], name: 'invalid trail 4' }, - { encoding: 'utf-8', input: [0xE0, 0x80, 0x00], name: 'invalid trail 5' }, - { encoding: 'utf-8', input: [0xE0, 0x80, 0xC0], name: 'invalid trail 6' }, - { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], name: '> 0x10FFFF' }, - { encoding: 'utf-8', input: [0xFE, 0x80, 0x80, 0x80, 0x80, 0x80], name: 'obsolete lead byte' }, - - // Overlong encodings - { encoding: 'utf-8', input: [0xC0, 0x80], name: 'overlong U+0000 - 2 bytes' }, - { encoding: 'utf-8', input: [0xE0, 0x80, 0x80], name: 'overlong U+0000 - 3 bytes' }, - { encoding: 'utf-8', input: [0xF0, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 4 bytes' }, - { encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 5 bytes' }, - { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 6 bytes' }, - - { encoding: 'utf-8', input: [0xC1, 0xBF], name: 'overlong U+007F - 2 bytes' }, - { encoding: 'utf-8', input: [0xE0, 0x81, 0xBF], name: 'overlong U+007F - 3 bytes' }, - { encoding: 'utf-8', input: [0xF0, 0x80, 0x81, 0xBF], name: 'overlong U+007F - 4 bytes' }, - { encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x81, 0xBF], name: 'overlong U+007F - 5 bytes' }, - { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF], name: 'overlong U+007F - 6 bytes' }, - - { encoding: 'utf-8', input: [0xE0, 0x9F, 0xBF], name: 'overlong U+07FF - 3 bytes' }, - { encoding: 'utf-8', input: [0xF0, 0x80, 0x9F, 0xBF], name: 'overlong U+07FF - 4 bytes' }, - { encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x9F, 0xBF], name: 'overlong U+07FF - 5 bytes' }, - { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF], name: 'overlong U+07FF - 6 bytes' }, - - { encoding: 'utf-8', input: [0xF0, 0x8F, 0xBF, 0xBF], name: 'overlong U+FFFF - 4 bytes' }, - { encoding: 'utf-8', input: [0xF8, 0x80, 0x8F, 0xBF, 0xBF], name: 'overlong U+FFFF - 5 bytes' }, - { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF], name: 'overlong U+FFFF - 6 bytes' }, - - { encoding: 'utf-8', input: [0xF8, 0x84, 0x8F, 0xBF, 0xBF], name: 'overlong U+10FFFF - 5 bytes' }, - { encoding: 'utf-8', input: [0xFC, 0x80, 0x84, 0x8F, 0xBF, 0xBF], name: 'overlong U+10FFFF - 6 bytes' }, - - // UTF-16 surrogates encoded as code points in UTF-8 - { encoding: 'utf-8', input: [0xED, 0xA0, 0x80], name: 'lead surrogate' }, - { encoding: 'utf-8', input: [0xED, 0xB0, 0x80], name: 'trail surrogate' }, - { encoding: 'utf-8', input: [0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80], name: 'surrogate pair' }, - ]; - - for (const test of bad) { - it.only(`${test.name}`, () => { - expect(() => nodeJsByteUtils.toUTF8(Uint8Array.from(test.input), 0, test.input.length, true)).to.throw(BSONError); - }); - } });