From 3768409d59d595b856dc042729906dd3bd6cb753 Mon Sep 17 00:00:00 2001
From: Aditi Khare <aditi.khare@mongodb.com>
Date: Tue, 23 Apr 2024 17:58:15 -0400
Subject: [PATCH] fix(NODE-6123): utf8 validation is not strict enough

---
 .../require_vendor.mjs                        |   2 +-
 src/error.ts                                  |  10 -
 src/parser/deserializer.ts                    |  14 +-
 src/test.ts                                   |   9 -
 src/utils/node_byte_utils.ts                  |  26 +-
 src/utils/web_byte_utils.ts                   |  12 +-
 src/validate_utf8.ts                          |  89 +++----
 test/node/byte_utils.test.ts                  | 250 +++++-------------
 8 files changed, 108 insertions(+), 304 deletions(-)
 delete mode 100644 src/test.ts

diff --git a/etc/rollup/rollup-plugin-require-vendor/require_vendor.mjs b/etc/rollup/rollup-plugin-require-vendor/require_vendor.mjs
index bdfe9c11..b59fd572 100644
--- a/etc/rollup/rollup-plugin-require-vendor/require_vendor.mjs
+++ b/etc/rollup/rollup-plugin-require-vendor/require_vendor.mjs
@@ -15,7 +15,7 @@ export class RequireVendor {
    */
   transform(code, id) {
     // TODO(NODE-4930)
-    if (!id.includes('web_byte_utils')) {
+    if (!id.includes('validate_utf8')) {
       return;
     }
 
diff --git a/src/error.ts b/src/error.ts
index 7dd101c5..ef5184a4 100644
--- a/src/error.ts
+++ b/src/error.ts
@@ -103,13 +103,3 @@ export class BSONOffsetError extends BSONError {
     this.offset = offset;
   }
 }
-
-export class BSONUTF8Error extends BSONError {
-  public get name(): 'BSONUTF8Error' {
-    return 'BSONUTF8Error';
-  }
-
-  constructor(message: string, options?: { cause?: unknown }) {
-    super(message, options);
-  }
-}
diff --git a/src/parser/deserializer.ts b/src/parser/deserializer.ts
index ac278190..5f030291 100644
--- a/src/parser/deserializer.ts
+++ b/src/parser/deserializer.ts
@@ -16,7 +16,6 @@ import { BSONSymbol } from '../symbol';
 import { Timestamp } from '../timestamp';
 import { ByteUtils } from '../utils/byte_utils';
 import { NumberUtils } from '../utils/number_utils';
-import { validateUtf8 } from '../validate_utf8';
 
 /** @public */
 export interface DeserializeOptions {
@@ -603,13 +602,12 @@ function deserializeObject(
         buffer[index + stringSize - 1] !== 0
       )
         throw new BSONError('bad string length in bson');
-      // Namespace
-      if (validation != null && validation.utf8) {
-        if (!validateUtf8(buffer, index, index + stringSize - 1)) {
-          throw new BSONError('Invalid UTF-8 string in BSON document');
-        }
-      }
-      const namespace = ByteUtils.toUTF8(buffer, index, index + stringSize - 1, false);
+      const namespace = ByteUtils.toUTF8(
+        buffer,
+        index,
+        index + stringSize - 1,
+        validation != null && (validation.utf8 as boolean)
+      );
       // Update parse index position
       index = index + stringSize;
 
diff --git a/src/test.ts b/src/test.ts
deleted file mode 100644
index 2ef044cf..00000000
--- a/src/test.ts
+++ /dev/null
@@ -1,9 +0,0 @@
-function parseUtf8Bits(arr: number[]): number {
-  arr[0] >>= (arr.length - 1);
-  for (let i = 1; i < arr.length; i++) {
-    arr[i] >>= 2;
-    arr[i] <<= i*8;
-    arr[0] = arr[0] | arr[i]
-  }
-  return arr[0];
-}
\ No newline at end of file
diff --git a/src/utils/node_byte_utils.ts b/src/utils/node_byte_utils.ts
index 8603b45e..d9487046 100644
--- a/src/utils/node_byte_utils.ts
+++ b/src/utils/node_byte_utils.ts
@@ -1,4 +1,4 @@
-import { BSONError, BSONUTF8Error } from '../error';
+import { BSONError } from '../error';
 import { validateUtf8 } from '../validate_utf8';
 import { tryReadBasicLatin, tryWriteBasicLatin } from './latin';
 
@@ -27,28 +27,6 @@ type NodeJsBufferConstructor = Omit<Uint8ArrayConstructor, 'from'> & {
 declare const Buffer: NodeJsBufferConstructor;
 declare const require: (mod: 'crypto') => { randomBytes: (byteLength: number) => Uint8Array };
 
-type TextDecoder = {
-  readonly encoding: string;
-  readonly fatal: boolean;
-  readonly ignoreBOM: boolean;
-  decode(input?: Uint8Array): string;
-};
-type TextDecoderConstructor = {
-  new (label: 'utf8', options: { fatal: boolean; ignoreBOM?: boolean }): TextDecoder;
-};
-
-type TextEncoder = {
-  readonly encoding: string;
-  encode(input?: string): Uint8Array;
-};
-type TextEncoderConstructor = {
-  new (): TextEncoder;
-};
-
-// Node byte utils global
-declare const TextDecoder: TextDecoderConstructor;
-declare const TextEncoder: TextEncoderConstructor;
-
 /** @internal */
 export function nodejsMathRandomBytes(byteLength: number) {
   return nodeJsByteUtils.fromNumberArray(
@@ -161,7 +139,7 @@ export const nodeJsByteUtils = {
       // TODO(NODE-4930): Insufficiently strict BSON UTF8 validation
       for (let i = 0; i < string.length; i++) {
         if (string.charCodeAt(i) === 0xfffd) {
-          if (!validateUtf8(buffer, start, end)) {
+          if (!validateUtf8(buffer, start, end, fatal)) {
             throw new BSONError('Invalid UTF-8 string in BSON document');
           }
           break;
diff --git a/src/utils/web_byte_utils.ts b/src/utils/web_byte_utils.ts
index e7d39b62..3fed2714 100644
--- a/src/utils/web_byte_utils.ts
+++ b/src/utils/web_byte_utils.ts
@@ -1,4 +1,5 @@
-import { BSONError, BSONUTF8Error } from '../error';
+import { BSONError } from '../error';
+import { validateUtf8 } from '../validate_utf8';
 import { tryReadBasicLatin } from './latin';
 
 type TextDecoder = {
@@ -179,14 +180,7 @@ export const webByteUtils = {
       return basicLatin;
     }
 
-    if (fatal) {
-      try {
-        return new TextDecoder('utf8', { fatal }).decode(uint8array.slice(start, end));
-      } catch (cause) {
-        throw new BSONUTF8Error('Invalid UTF-8 string in BSON document', { cause });
-      }
-    }
-    return new TextDecoder('utf8', { fatal }).decode(uint8array.slice(start, end));
+    return validateUtf8(uint8array, start, end, fatal);
   },
 
   utf8ByteLength(input: string): number {
diff --git a/src/validate_utf8.ts b/src/validate_utf8.ts
index ba1acb7e..1d2a8156 100644
--- a/src/validate_utf8.ts
+++ b/src/validate_utf8.ts
@@ -1,21 +1,26 @@
-import { NumberUtils } from "./utils/number_utils";
-
-const FIRST_BIT = 0x80;
-const FIRST_TWO_BITS = 0xc0;
-const FIRST_THREE_BITS = 0xe0;
-const FIRST_FOUR_BITS = 0xf0;
-const FIRST_FIVE_BITS = 0xf8;
-
-const TWO_BIT_CHAR = 0xc0;
-const THREE_BIT_CHAR = 0xe0;
-const FOUR_BIT_CHAR = 0xf0;
-const CONTINUING_CHAR = 0x80;
-
-// max utf8 values representable in given number of bytes
-const ONE_BYTE_MAX = 0x7f;
-const TWO_BYTE_MAX = 0x7ff;
-const THREE_BYTE_MAX = 0xf7ff;
-
+import { BSONError } from './error';
+
+type TextDecoder = {
+  readonly encoding: string;
+  readonly fatal: boolean;
+  readonly ignoreBOM: boolean;
+  decode(input?: Uint8Array): string;
+};
+type TextDecoderConstructor = {
+  new (label: 'utf8', options: { fatal: boolean; ignoreBOM?: boolean }): TextDecoder;
+};
+
+type TextEncoder = {
+  readonly encoding: string;
+  encode(input?: string): Uint8Array;
+};
+type TextEncoderConstructor = {
+  new (): TextEncoder;
+};
+
+// Node byte utils global
+declare const TextDecoder: TextDecoderConstructor;
+declare const TextEncoder: TextEncoderConstructor;
 
 /**
  * Determines if the passed in bytes are valid utf8
@@ -24,45 +29,17 @@ const THREE_BYTE_MAX = 0xf7ff;
  * @param end - The index to end validating
  */
 export function validateUtf8(
-  bytes: { [index: number]: number },
+  buffer: Uint8Array,
   start: number,
-  end: number
-): boolean {
-  let continuation = 0;
-
-  for (let i = start; i < end; i += 1) {
-    const byte = bytes[i];
-
-    if (continuation) {
-      if ((byte & FIRST_TWO_BITS) !== CONTINUING_CHAR) {
-        return false;
-      }
-      continuation -= 1;
-    } else if (byte & FIRST_BIT &&
-      parseUtf8Bytes([byte, bytes[i+1]]) > ONE_BYTE_MAX) {
-      if ((byte & FIRST_THREE_BITS) === TWO_BIT_CHAR) {
-        continuation = 1;
-      } else if ((byte & FIRST_FOUR_BITS) === THREE_BIT_CHAR &&
-      parseUtf8Bytes([byte, bytes[i+1], bytes[i+2]]) > TWO_BYTE_MAX) {
-        continuation = 2;
-      } else if ((byte & FIRST_FIVE_BITS) === FOUR_BIT_CHAR &&
-      parseUtf8Bytes([byte, bytes[i+1], bytes[i+2], bytes[i+3]]) > THREE_BYTE_MAX) {
-        continuation = 3;
-      } else {
-        return false;
-      }
+  end: number,
+  fatal: boolean
+): string {
+  if (fatal) {
+    try {
+      return new TextDecoder('utf8', { fatal }).decode(buffer.slice(start, end));
+    } catch (cause) {
+      throw new BSONError('Invalid UTF-8 string in BSON document', { cause });
     }
   }
-
-  return !continuation;
+  return new TextDecoder('utf8', { fatal }).decode(buffer.slice(start, end));
 }
-
-function parseUtf8Bytes(arr: number[]): number {
-  arr[0] >>= (arr.length - 1);
-  for (let i = 1; i < arr.length; i++) {
-    arr[i] >>= 2;
-    arr[i] <<= i*8;
-    arr[0] = arr[0] | arr[i]
-  }
-  return arr[0];
-}
\ No newline at end of file
diff --git a/test/node/byte_utils.test.ts b/test/node/byte_utils.test.ts
index 0a3e75ae..a41b19d3 100644
--- a/test/node/byte_utils.test.ts
+++ b/test/node/byte_utils.test.ts
@@ -8,7 +8,7 @@ import { webByteUtils } from '../../src/utils/web_byte_utils';
 import * as sinon from 'sinon';
 import { loadCJSModuleBSON, loadReactNativeCJSModuleBSON, loadESModuleBSON } from '../load_bson';
 import * as crypto from 'node:crypto';
-import { BSONError, BSONUTF8Error } from '../../src/error';
+import { BSONError } from '../../src/error';
 
 type ByteUtilTest<K extends keyof ByteUtils> = {
   name: string;
@@ -401,7 +401,6 @@ const fromUTF8Tests: ByteUtilTest<'encodeUTF8Into'>[] = [
   }
 ];
 
-
 const toUTF8Tests: ByteUtilTest<'toUTF8'>[] = [
   {
     name: 'should create utf8 string from buffer input',
@@ -428,7 +427,7 @@ const toUTF8Tests: ByteUtilTest<'toUTF8'>[] = [
     }
   },
   {
-    name: 'should throw an error if fatal is set and string is invalid',
+    name: 'should throw an error if fatal is set and string is a sequence that decodes to an invalid code point',
     inputs: [Buffer.from('616263f09fa4', 'hex'), 0, 7, true],
     expectation({ error }) {
       expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
@@ -454,144 +453,18 @@ const toUTF8Tests: ByteUtilTest<'toUTF8'>[] = [
     expectation({ error }) {
       expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
     }
-  }, 
-  { inputs: [Buffer.from('0xFF', 'hex'), 0, 1, true], name: 'throws when provided with invalid code' , expectation({ error }) {
-    expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xC0', 'hex'), 0, 1, true], name: 'throws when provided with ends early' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xE0', 'hex'), 0, 1, true], name: 'throws when provided with ends early 2' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xC000', 'hex'), 0, 2, true], name: 'throws when provided with invalid trail' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xC0C0', 'hex'), 0, 2, true], name: 'throws when provided with invalid trail 2' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xE000', 'hex'), 0, 2, true], name: 'throws when provided with invalid trail 3' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xE0C0', 'hex'), 0, 2, true], name: 'throws when provided with invalid trail 4' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xE08000', 'hex'), 0, 3, true], name: 'throws when provided with invalid trail 5' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xE080C0', 'hex'), 0, 3, true], name: 'throws when provided with invalid trail 6' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xFC8080808080', 'hex'), 0, 6, true], name: 'throws when provided with > 0x10FFFF' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xFE8080808080', 'hex'), 0, 6, true], name: 'throws when provided with obsolete lead byte' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-
-  // Overlong encodings
-  { inputs: [Buffer.from('0xC080', 'hex'), 0, 2, true], name: 'throws when provided with overlong U+0000 - 2 bytes' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xE08080', 'hex'), 0, 3, true], name: 'throws when provided with overlong U+0000 - 3 bytes' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xF0808080', 'hex'), 0, 4, true], name: 'throws when provided with overlong U+0000 - 4 bytes' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xF880808080', 'hex'), 0, 5, true], name: 'throws when provided with overlong U+0000 - 5 bytes' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xFC8080808080', 'hex'), 0, 6, true], name: 'throws when provided with overlong U+0000 - 6 bytes' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-
-  { inputs: [Buffer.from('0xC1BF', 'hex'), 0, 2, true], name: 'throws when provided with overlong U+007F - 2 bytes' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xE081BF', 'hex'), 0, 3, true], name: 'throws when provided with overlong U+007F - 3 bytes' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
   },
-  { inputs: [Buffer.from('0xF08081BF', 'hex'), 0, 4, true], name: 'throws when provided with overlong U+007F - 4 bytes' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xF8808081BF', 'hex'), 0, 5, true], name: 'throws when provided with overlong U+007F - 5 bytes' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xFC80808081BF', 'hex'), 0, 6, true], name: 'throws when provided with overlong U+007F - 6 bytes' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-
-  { inputs: [Buffer.from('0xE09FBF', 'hex'), 0, 3, true], name: 'throws when provided with overlong U+07FF - 3 bytes' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xF0809FBF', 'hex'), 0, 4, true], name: 'throws when provided with overlong U+07FF - 4 bytes' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xF880809FBF', 'hex'), 0, 5, true], name: 'throws when provided with overlong U+07FF - 5 bytes' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-    { inputs: [Buffer.from('0xFC8080809FBF', 'hex'), 0, 6, true], name: 'throws when provided with overlong U+07FF - 6 bytes' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-
-  { inputs: [Buffer.from('0xF08FBFBF', 'hex'), 0, 4, true], name: 'throws when provided with overlong U+FFFF - 4 bytes' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xF8808FBFBF', 'hex'), 0, 5, true], name: 'throws when provided with overlong U+FFFF - 5 bytes' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xFC80808FBFBF', 'hex'), 0, 6, true], name: 'throws when provided with overlong U+FFFF - 6 bytes' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-
-  { inputs: [Buffer.from('0xF8848FBFBF', 'hex'), 0, 5, true], name: 'throws when provided with overlong U+10FFFF - 5 bytes' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xFC80848FBFBF', 'hex'), 0, 6, true], name: 'throws when provided with overlong U+10FFFF - 6 bytes' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-
-  // UTF-16 surrogates encoded as code points in UTF-8
-  { inputs: [Buffer.from('0xEDA080', 'hex'), 0, 3, true], name: 'throws when provided with lead surrogate' , expectation({ error }) {
-      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
-    }
-  },
-  { inputs: [Buffer.from('0xEDB080', 'hex'), 0, 3, true], name: 'throws when provided with trail surrogate' , expectation({ error }) {
+  {
+    name: 'throw an error if fatal is set and string contains a non-continuation byte before the end of the character',
+    inputs: [Buffer.from('c000', 'hex'), 0, 2, true],
+    expectation({ error }) {
       expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
     }
   },
-  { inputs: [Buffer.from('0xEDA080EDB080', 'hex'), 0, 6, true], name: 'throws when provided with surrogate pair' , expectation({ error }) {
+  {
+    name: 'throw an error if fatal is set and string ends before the end of the character',
+    inputs: [Buffer.from('c0', 'hex'), 0, 1, true],
+    expectation({ error }) {
       expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
     }
   }
@@ -658,6 +531,51 @@ const randomBytesTests: ByteUtilTest<'randomBytes'>[] = [
   }
 ];
 
+// extra error cases copied from Web platform specs
+const toUTF8ErrorCaseTests = [
+  { input: [0xff], name: 'invalid code' },
+  { input: [0xc0], name: 'ends early' },
+  { input: [0xe0], name: 'ends early 2' },
+  { input: [0xc0, 0x00], name: 'invalid trail' },
+  { input: [0xc0, 0xc0], name: 'invalid trail 2' },
+  { input: [0xe0, 0x00], name: 'invalid trail 3' },
+  { input: [0xe0, 0xc0], name: 'invalid trail 4' },
+  { input: [0xe0, 0x80, 0x00], name: 'invalid trail 5' },
+  { input: [0xe0, 0x80, 0xc0], name: 'invalid trail 6' },
+  { input: [0xfc, 0x80, 0x80, 0x80, 0x80, 0x80], name: '> 0x10ffff' },
+  { input: [0xfe, 0x80, 0x80, 0x80, 0x80, 0x80], name: 'obsolete lead byte' },
+
+  // Overlong encodings
+  { input: [0xc0, 0x80], name: 'overlong U+0000 - 2 bytes' },
+  { input: [0xe0, 0x80, 0x80], name: 'overlong U+0000 - 3 bytes' },
+  { input: [0xf0, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 4 bytes' },
+  { input: [0xf8, 0x80, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 5 bytes' },
+  { input: [0xfc, 0x80, 0x80, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 6 bytes' },
+
+  { input: [0xc1, 0xbf], name: 'overlong U+007f - 2 bytes' },
+  { input: [0xe0, 0x81, 0xbf], name: 'overlong U+007f - 3 bytes' },
+  { input: [0xf0, 0x80, 0x81, 0xbf], name: 'overlong U+007f - 4 bytes' },
+  { input: [0xf8, 0x80, 0x80, 0x81, 0xbf], name: 'overlong U+007f - 5 bytes' },
+  { input: [0xfc, 0x80, 0x80, 0x80, 0x81, 0xbf], name: 'overlong U+007f - 6 bytes' },
+
+  { input: [0xe0, 0x9f, 0xbf], name: 'overlong U+07ff - 3 bytes' },
+  { input: [0xf0, 0x80, 0x9f, 0xbf], name: 'overlong U+07ff - 4 bytes' },
+  { input: [0xf8, 0x80, 0x80, 0x9f, 0xbf], name: 'overlong U+07ff - 5 bytes' },
+  { input: [0xfc, 0x80, 0x80, 0x80, 0x9f, 0xbf], name: 'overlong U+07ff - 6 bytes' },
+
+  { input: [0xf0, 0x8f, 0xbf, 0xbf], name: 'overlong U+ffff - 4 bytes' },
+  { input: [0xf8, 0x80, 0x8f, 0xbf, 0xbf], name: 'overlong U+ffff - 5 bytes' },
+  { input: [0xfc, 0x80, 0x80, 0x8f, 0xbf, 0xbf], name: 'overlong U+ffff - 6 bytes' },
+
+  { input: [0xf8, 0x84, 0x8f, 0xbf, 0xbf], name: 'overlong U+10ffff - 5 bytes' },
+  { input: [0xfc, 0x80, 0x84, 0x8f, 0xbf, 0xbf], name: 'overlong U+10ffff - 6 bytes' },
+
+  // UTf-16 surrogates encoded as code points in UTf-8
+  { input: [0xed, 0xa0, 0x80], name: 'lead surrogate' },
+  { input: [0xed, 0xb0, 0x80], name: 'trail surrogate' },
+  { input: [0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80], name: 'surrogate pair' }
+];
+
 const utils = new Map([
   ['nodeJsByteUtils', nodeJsByteUtils],
   ['webByteUtils', webByteUtils]
@@ -963,57 +881,15 @@ describe('ByteUtils', () => {
             test.expectation({ web: byteUtilsName === 'webByteUtils', output, error });
           });
         }
+        if (utility === 'toUTF8')
+          for (const test of toUTF8ErrorCaseTests) {
+            it(`throws error when fatal is set and provided ${test.name} as input`, () => {
+              expect(() =>
+                byteUtils[utility](Uint8Array.from(test.input), 0, test.input.length, true)
+              ).to.throw(BSONError, /Invalid UTF-8 string in BSON document/i);
+            });
+          }
       });
     }
   }
-
-  let bad = [
-    { encoding: 'utf-8', input: [0xFF], name: 'invalid code' },
-    { encoding: 'utf-8', input: [0xC0], name: 'ends early' },
-    { encoding: 'utf-8', input: [0xE0], name: 'ends early 2' },
-    { encoding: 'utf-8', input: [0xC0, 0x00], name: 'invalid trail' },
-    { encoding: 'utf-8', input: [0xC0, 0xC0], name: 'invalid trail 2' },
-    { encoding: 'utf-8', input: [0xE0, 0x00], name: 'invalid trail 3' },
-    { encoding: 'utf-8', input: [0xE0, 0xC0], name: 'invalid trail 4' },
-    { encoding: 'utf-8', input: [0xE0, 0x80, 0x00], name: 'invalid trail 5' },
-    { encoding: 'utf-8', input: [0xE0, 0x80, 0xC0], name: 'invalid trail 6' },
-    { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], name: '> 0x10FFFF' },
-    { encoding: 'utf-8', input: [0xFE, 0x80, 0x80, 0x80, 0x80, 0x80], name: 'obsolete lead byte' },
-
-    // Overlong encodings
-    { encoding: 'utf-8', input: [0xC0, 0x80], name: 'overlong U+0000 - 2 bytes' },
-    { encoding: 'utf-8', input: [0xE0, 0x80, 0x80], name: 'overlong U+0000 - 3 bytes' },
-    { encoding: 'utf-8', input: [0xF0, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 4 bytes' },
-    { encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 5 bytes' },
-    { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 6 bytes' },
-
-    { encoding: 'utf-8', input: [0xC1, 0xBF], name: 'overlong U+007F - 2 bytes' },
-    { encoding: 'utf-8', input: [0xE0, 0x81, 0xBF], name: 'overlong U+007F - 3 bytes' },
-    { encoding: 'utf-8', input: [0xF0, 0x80, 0x81, 0xBF], name: 'overlong U+007F - 4 bytes' },
-    { encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x81, 0xBF], name: 'overlong U+007F - 5 bytes' },
-    { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF], name: 'overlong U+007F - 6 bytes' },
-
-    { encoding: 'utf-8', input: [0xE0, 0x9F, 0xBF], name: 'overlong U+07FF - 3 bytes' },
-    { encoding: 'utf-8', input: [0xF0, 0x80, 0x9F, 0xBF], name: 'overlong U+07FF - 4 bytes' },
-    { encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x9F, 0xBF], name: 'overlong U+07FF - 5 bytes' },
-    { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF], name: 'overlong U+07FF - 6 bytes' },
-
-    { encoding: 'utf-8', input: [0xF0, 0x8F, 0xBF, 0xBF], name: 'overlong U+FFFF - 4 bytes' },
-    { encoding: 'utf-8', input: [0xF8, 0x80, 0x8F, 0xBF, 0xBF], name: 'overlong U+FFFF - 5 bytes' },
-    { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF], name: 'overlong U+FFFF - 6 bytes' },
-
-    { encoding: 'utf-8', input: [0xF8, 0x84, 0x8F, 0xBF, 0xBF], name: 'overlong U+10FFFF - 5 bytes' },
-    { encoding: 'utf-8', input: [0xFC, 0x80, 0x84, 0x8F, 0xBF, 0xBF], name: 'overlong U+10FFFF - 6 bytes' },
-
-    // UTF-16 surrogates encoded as code points in UTF-8
-    { encoding: 'utf-8', input: [0xED, 0xA0, 0x80], name: 'lead surrogate' },
-    { encoding: 'utf-8', input: [0xED, 0xB0, 0x80], name: 'trail surrogate' },
-    { encoding: 'utf-8', input: [0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80], name: 'surrogate pair' },
-  ];
-  
-  for (const test of bad) {
-    it.only(`${test.name}`, () => {
-      expect(() => nodeJsByteUtils.toUTF8(Uint8Array.from(test.input), 0, test.input.length, true)).to.throw(BSONError);
-    });
-  }
 });