From d7bdcec04349af697ead5655d14c4494f307b6a1 Mon Sep 17 00:00:00 2001
From: Neal Beeken <neal.beeken@mongodb.com>
Date: Mon, 18 Nov 2024 18:05:52 -0500
Subject: [PATCH] feat(NODE-6537): add support for binary vectors (#730)

---
 .evergreen/run-big-endian-test.sh         |   5 -
 etc/Dockerfile                            |  10 +
 etc/run-big-endian-test.sh                |  22 ++
 src/binary.ts                             | 218 ++++++++++++++++++
 src/parser/serializer.ts                  |   6 +-
 src/utils/byte_utils.ts                   |   2 +
 src/utils/node_byte_utils.ts              |   7 +-
 src/utils/number_utils.ts                 |   4 +
 src/utils/web_byte_utils.ts               |  21 +-
 test/node/binary.test.ts                  | 268 +++++++++++++++++++++-
 test/node/bson_binary_vector.spec.test.ts |  72 +-----
 test/node/byte_utils.test.ts              |  27 ++-
 12 files changed, 585 insertions(+), 77 deletions(-)
 delete mode 100644 .evergreen/run-big-endian-test.sh
 create mode 100644 etc/Dockerfile
 create mode 100644 etc/run-big-endian-test.sh

diff --git a/.evergreen/run-big-endian-test.sh b/.evergreen/run-big-endian-test.sh
deleted file mode 100644
index a1cc8240..00000000
--- a/.evergreen/run-big-endian-test.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/usr/bin/env bash
-
-source $DRIVERS_TOOLS/.evergreen/init-node-and-npm-env.sh
-
-npx mocha test/s390x/big_endian.test.ts
diff --git a/etc/Dockerfile b/etc/Dockerfile
new file mode 100644
index 00000000..3ceb2a50
--- /dev/null
+++ b/etc/Dockerfile
@@ -0,0 +1,10 @@
+FROM node:22 AS build
+
+WORKDIR /bson
+COPY . .
+
+RUN rm -rf node_modules && npm install && npm test
+
+FROM scratch
+
+COPY --from=build /bson/docs/ /
diff --git a/etc/run-big-endian-test.sh b/etc/run-big-endian-test.sh
new file mode 100644
index 00000000..775cc6ee
--- /dev/null
+++ b/etc/run-big-endian-test.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+
+# At the time of writing. This script is not used in CI.
+# but can be used to locally iterate on big endian bugs.
+# buildx requires an output, so I put docs which should be a no-op.
+
+set -o errexit
+set -o nounset
+set -o pipefail
+set -o xtrace
+
+# If you get an error you may have an outdated buildkit version
+# Try running this:
+# docker buildx rm builder && docker buildx create --name builder --bootstrap --use
+
+docker buildx build \
+    --progress=plain \
+    --platform linux/s390x \
+    --build-arg="NODE_ARCH=s390x" \
+    -f ./etc/Dockerfile \
+    --output type=local,dest=./docs,platform-split=false \
+    .
diff --git a/src/binary.ts b/src/binary.ts
index d3b496c3..1fe09805 100644
--- a/src/binary.ts
+++ b/src/binary.ts
@@ -4,6 +4,7 @@ import { BSONError } from './error';
 import { BSON_BINARY_SUBTYPE_UUID_NEW } from './constants';
 import { ByteUtils } from './utils/byte_utils';
 import { BSONValue } from './bson_value';
+import { NumberUtils } from './utils/number_utils';
 
 /** @public */
 export type BinarySequence = Uint8Array | number[];
@@ -58,9 +59,18 @@ export class Binary extends BSONValue {
   static readonly SUBTYPE_COLUMN = 7;
   /** Sensitive BSON type */
   static readonly SUBTYPE_SENSITIVE = 8;
+  /** Vector BSON type */
+  static readonly SUBTYPE_VECTOR = 9;
   /** User BSON type */
   static readonly SUBTYPE_USER_DEFINED = 128;
 
+  /** datatype of a Binary Vector (subtype: 9) */
+  static readonly VECTOR_TYPE = Object.freeze({
+    Int8: 0x03,
+    Float32: 0x27,
+    PackedBit: 0x10
+  } as const);
+
   /**
    * The bytes of the Binary value.
    *
@@ -238,6 +248,11 @@ export class Binary extends BSONValue {
   /** @internal */
   toExtendedJSON(options?: EJSONOptions): BinaryExtendedLegacy | BinaryExtended {
     options = options || {};
+
+    if (this.sub_type === Binary.SUBTYPE_VECTOR) {
+      validateBinaryVector(this);
+    }
+
     const base64String = ByteUtils.toBase64(this.buffer);
 
     const subType = Number(this.sub_type).toString(16);
@@ -310,6 +325,209 @@ export class Binary extends BSONValue {
     const subTypeArg = inspect(this.sub_type, options);
     return `Binary.createFromBase64(${base64Arg}, ${subTypeArg})`;
   }
+
+  /**
+   * If this Binary represents a Int8 Vector (`binary.buffer[0] === Binary.VECTOR_TYPE.Int8`),
+   * returns a copy of the bytes in a new Int8Array.
+   *
+   * If the Binary is not a Vector, or the datatype is not Int8, an error is thrown.
+   */
+  public toInt8Array(): Int8Array {
+    if (this.sub_type !== Binary.SUBTYPE_VECTOR) {
+      throw new BSONError('Binary sub_type is not Vector');
+    }
+
+    if (this.buffer[0] !== Binary.VECTOR_TYPE.Int8) {
+      throw new BSONError('Binary datatype field is not Int8');
+    }
+
+    return new Int8Array(
+      this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position)
+    );
+  }
+
+  /**
+   * If this Binary represents a Float32 Vector (`binary.buffer[0] === Binary.VECTOR_TYPE.Float32`),
+   * returns a copy of the bytes in a new Float32Array.
+   *
+   * If the Binary is not a Vector, or the datatype is not Float32, an error is thrown.
+   */
+  public toFloat32Array(): Float32Array {
+    if (this.sub_type !== Binary.SUBTYPE_VECTOR) {
+      throw new BSONError('Binary sub_type is not Vector');
+    }
+
+    if (this.buffer[0] !== Binary.VECTOR_TYPE.Float32) {
+      throw new BSONError('Binary datatype field is not Float32');
+    }
+
+    const floatBytes = new Uint8Array(
+      this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position)
+    );
+
+    if (NumberUtils.isBigEndian) ByteUtils.swap32(floatBytes);
+
+    return new Float32Array(floatBytes.buffer);
+  }
+
+  /**
+   * If this Binary represents packed bit Vector (`binary.buffer[0] === Binary.VECTOR_TYPE.PackedBit`),
+   * returns a copy of the bytes that are packed bits.
+   *
+   * Use `toBits` to get the unpacked bits.
+   *
+   * If the Binary is not a Vector, or the datatype is not PackedBit, an error is thrown.
+   */
+  public toPackedBits(): Uint8Array {
+    if (this.sub_type !== Binary.SUBTYPE_VECTOR) {
+      throw new BSONError('Binary sub_type is not Vector');
+    }
+
+    if (this.buffer[0] !== Binary.VECTOR_TYPE.PackedBit) {
+      throw new BSONError('Binary datatype field is not packed bit');
+    }
+
+    return new Uint8Array(
+      this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position)
+    );
+  }
+
+  /**
+   * If this Binary represents a Packed bit Vector (`binary.buffer[0] === Binary.VECTOR_TYPE.PackedBit`),
+   * returns a copy of the bit unpacked into a new Int8Array.
+   *
+   * Use `toPackedBits` to get the bits still in packed form.
+   *
+   * If the Binary is not a Vector, or the datatype is not PackedBit, an error is thrown.
+   */
+  public toBits(): Int8Array {
+    if (this.sub_type !== Binary.SUBTYPE_VECTOR) {
+      throw new BSONError('Binary sub_type is not Vector');
+    }
+
+    if (this.buffer[0] !== Binary.VECTOR_TYPE.PackedBit) {
+      throw new BSONError('Binary datatype field is not packed bit');
+    }
+
+    const byteCount = this.length() - 2;
+    const bitCount = byteCount * 8 - this.buffer[1];
+    const bits = new Int8Array(bitCount);
+
+    for (let bitOffset = 0; bitOffset < bits.length; bitOffset++) {
+      const byteOffset = (bitOffset / 8) | 0;
+      const byte = this.buffer[byteOffset + 2];
+      const shift = 7 - (bitOffset % 8);
+      const bit = (byte >> shift) & 1;
+      bits[bitOffset] = bit;
+    }
+
+    return bits;
+  }
+
+  /**
+   * Constructs a Binary representing an Int8 Vector.
+   * @param array - The array to store as a view on the Binary class
+   */
+  public static fromInt8Array(array: Int8Array): Binary {
+    const buffer = ByteUtils.allocate(array.byteLength + 2);
+    buffer[0] = Binary.VECTOR_TYPE.Int8;
+    buffer[1] = 0;
+    const intBytes = new Uint8Array(array.buffer, array.byteOffset, array.byteLength);
+    buffer.set(intBytes, 2);
+    return new this(buffer, this.SUBTYPE_VECTOR);
+  }
+
+  /** Constructs a Binary representing an Float32 Vector. */
+  public static fromFloat32Array(array: Float32Array): Binary {
+    const binaryBytes = ByteUtils.allocate(array.byteLength + 2);
+    binaryBytes[0] = Binary.VECTOR_TYPE.Float32;
+    binaryBytes[1] = 0;
+
+    const floatBytes = new Uint8Array(array.buffer, array.byteOffset, array.byteLength);
+    binaryBytes.set(floatBytes, 2);
+
+    if (NumberUtils.isBigEndian) ByteUtils.swap32(new Uint8Array(binaryBytes.buffer, 2));
+
+    return new this(binaryBytes, this.SUBTYPE_VECTOR);
+  }
+
+  /**
+   * Constructs a Binary representing a packed bit Vector.
+   *
+   * Use `fromBits` to pack an array of 1s and 0s.
+   */
+  public static fromPackedBits(array: Uint8Array, padding = 0): Binary {
+    const buffer = ByteUtils.allocate(array.byteLength + 2);
+    buffer[0] = Binary.VECTOR_TYPE.PackedBit;
+    buffer[1] = padding;
+    buffer.set(array, 2);
+    return new this(buffer, this.SUBTYPE_VECTOR);
+  }
+
+  /**
+   * Constructs a Binary representing an Packed Bit Vector.
+   * @param array - The array of 1s and 0s to pack into the Binary instance
+   */
+  public static fromBits(bits: ArrayLike<number>): Binary {
+    const byteLength = (bits.length + 7) >>> 3; // ceil(bits.length / 8)
+    const bytes = new Uint8Array(byteLength + 2);
+    bytes[0] = Binary.VECTOR_TYPE.PackedBit;
+
+    const remainder = bits.length % 8;
+    bytes[1] = remainder === 0 ? 0 : 8 - remainder;
+
+    for (let bitOffset = 0; bitOffset < bits.length; bitOffset++) {
+      const byteOffset = bitOffset >>> 3; // floor(bitOffset / 8)
+      const bit = bits[bitOffset];
+
+      if (bit !== 0 && bit !== 1) {
+        throw new BSONError(
+          `Invalid bit value at ${bitOffset}: must be 0 or 1, found ${bits[bitOffset]}`
+        );
+      }
+
+      if (bit === 0) continue;
+
+      const shift = 7 - (bitOffset % 8);
+      bytes[byteOffset + 2] |= bit << shift;
+    }
+
+    return new this(bytes, Binary.SUBTYPE_VECTOR);
+  }
+}
+
+export function validateBinaryVector(vector: Binary): void {
+  if (vector.sub_type !== Binary.SUBTYPE_VECTOR) return;
+
+  const size = vector.position;
+
+  // NOTE: Validation is only applied to **KNOWN** vector types
+  // If a new datatype is introduced, a future version of the library will need to add validation
+  const datatype = vector.buffer[0];
+
+  // NOTE: We do not enable noUncheckedIndexedAccess so TS believes this is always number
+  // a Binary vector may be empty, in which case the padding is undefined
+  // this possible value is tolerable for our validation checks
+  const padding: number | undefined = vector.buffer[1];
+
+  if (
+    (datatype === Binary.VECTOR_TYPE.Float32 || datatype === Binary.VECTOR_TYPE.Int8) &&
+    padding !== 0
+  ) {
+    throw new BSONError('Invalid Vector: padding must be zero for int8 and float32 vectors');
+  }
+
+  if (datatype === Binary.VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) {
+    throw new BSONError(
+      'Invalid Vector: padding must be zero for packed bit vectors that are empty'
+    );
+  }
+
+  if (datatype === Binary.VECTOR_TYPE.PackedBit && padding > 7) {
+    throw new BSONError(
+      `Invalid Vector: padding must be a value between 0 and 7. found: ${padding}`
+    );
+  }
 }
 
 /** @public */
diff --git a/src/parser/serializer.ts b/src/parser/serializer.ts
index fbb47245..787bfa8a 100644
--- a/src/parser/serializer.ts
+++ b/src/parser/serializer.ts
@@ -1,4 +1,4 @@
-import { Binary } from '../binary';
+import { Binary, validateBinaryVector } from '../binary';
 import type { BSONSymbol, DBRef, Document, MaxKey } from '../bson';
 import type { Code } from '../code';
 import * as constants from '../constants';
@@ -495,6 +495,10 @@ function serializeBinary(buffer: Uint8Array, key: string, value: Binary, index:
     index += NumberUtils.setInt32LE(buffer, index, size);
   }
 
+  if (value.sub_type === Binary.SUBTYPE_VECTOR) {
+    validateBinaryVector(value);
+  }
+
   if (size <= 16) {
     for (let i = 0; i < size; i++) buffer[index + i] = data[i];
   } else {
diff --git a/src/utils/byte_utils.ts b/src/utils/byte_utils.ts
index f3da53fd..05e30515 100644
--- a/src/utils/byte_utils.ts
+++ b/src/utils/byte_utils.ts
@@ -39,6 +39,8 @@ export type ByteUtils = {
   encodeUTF8Into: (destination: Uint8Array, source: string, byteOffset: number) => number;
   /** Generate a Uint8Array filled with random bytes with byteLength */
   randomBytes: (byteLength: number) => Uint8Array;
+  /** Interprets `buffer` as an array of 32-bit values and swaps the byte order in-place. */
+  swap32: (buffer: Uint8Array) => Uint8Array;
 };
 
 declare const Buffer: { new (): unknown; prototype?: { _isBuffer?: boolean } } | undefined;
diff --git a/src/utils/node_byte_utils.ts b/src/utils/node_byte_utils.ts
index 7836345f..b47e723d 100644
--- a/src/utils/node_byte_utils.ts
+++ b/src/utils/node_byte_utils.ts
@@ -9,6 +9,7 @@ type NodeJsBuffer = ArrayBufferView &
     copy(target: Uint8Array, targetStart: number, sourceStart: number, sourceEnd: number): number;
     toString: (this: Uint8Array, encoding: NodeJsEncoding, start?: number, end?: number) => string;
     equals: (this: Uint8Array, other: Uint8Array) => boolean;
+    swap32: (this: NodeJsBuffer) => NodeJsBuffer;
   };
 type NodeJsBufferConstructor = Omit<Uint8ArrayConstructor, 'from'> & {
   alloc: (size: number) => NodeJsBuffer;
@@ -159,5 +160,9 @@ export const nodeJsByteUtils = {
     return nodeJsByteUtils.toLocalBufferType(buffer).write(source, byteOffset, undefined, 'utf8');
   },
 
-  randomBytes: nodejsRandomBytes
+  randomBytes: nodejsRandomBytes,
+
+  swap32(buffer: Uint8Array): NodeJsBuffer {
+    return nodeJsByteUtils.toLocalBufferType(buffer).swap32();
+  }
 };
diff --git a/src/utils/number_utils.ts b/src/utils/number_utils.ts
index 32f6f5cc..02f4dbeb 100644
--- a/src/utils/number_utils.ts
+++ b/src/utils/number_utils.ts
@@ -13,6 +13,8 @@ const isBigEndian = FLOAT_BYTES[7] === 0;
  * A collection of functions that get or set various numeric types and bit widths from a Uint8Array.
  */
 export type NumberUtils = {
+  /** Is true if the current system is big endian. */
+  isBigEndian: boolean;
   /**
    * Parses a signed int32 at offset. Throws a `RangeError` if value is negative.
    */
@@ -35,6 +37,8 @@ export type NumberUtils = {
  * @public
  */
 export const NumberUtils: NumberUtils = {
+  isBigEndian,
+
   getNonnegativeInt32LE(source: Uint8Array, offset: number): number {
     if (source[offset + 3] > 127) {
       throw new RangeError(`Size cannot be negative at offset: ${offset}`);
diff --git a/src/utils/web_byte_utils.ts b/src/utils/web_byte_utils.ts
index 9d9c343f..336d37ed 100644
--- a/src/utils/web_byte_utils.ts
+++ b/src/utils/web_byte_utils.ts
@@ -193,5 +193,24 @@ export const webByteUtils = {
     return bytes.byteLength;
   },
 
-  randomBytes: webRandomBytes
+  randomBytes: webRandomBytes,
+
+  swap32(buffer: Uint8Array): Uint8Array {
+    if (buffer.length % 4 !== 0) {
+      throw new RangeError('Buffer size must be a multiple of 32-bits');
+    }
+
+    for (let i = 0; i < buffer.length; i += 4) {
+      const byte0 = buffer[i];
+      const byte1 = buffer[i + 1];
+      const byte2 = buffer[i + 2];
+      const byte3 = buffer[i + 3];
+      buffer[i] = byte3;
+      buffer[i + 1] = byte2;
+      buffer[i + 2] = byte1;
+      buffer[i + 3] = byte0;
+    }
+
+    return buffer;
+  }
 };
diff --git a/test/node/binary.test.ts b/test/node/binary.test.ts
index 1f94a619..20ab1cf0 100644
--- a/test/node/binary.test.ts
+++ b/test/node/binary.test.ts
@@ -1,6 +1,6 @@
 import { expect } from 'chai';
 import * as vm from 'node:vm';
-import { __isWeb__, Binary, BSON } from '../register-bson';
+import { __isWeb__, Binary, BSON, BSONError } from '../register-bson';
 import * as util from 'node:util';
 
 describe('class Binary', () => {
@@ -249,4 +249,270 @@ describe('class Binary', () => {
       expect(roundTrippedBin.bin.toJSON()).to.equal(bin.toJSON());
     });
   });
+
+  describe('sub_type vector', () => {
+    describe('datatype constants', () => {
+      it('has Int8, Float32 and PackedBit', () => {
+        expect(Binary.VECTOR_TYPE).to.have.property('Int8', 0x03);
+        expect(Binary.VECTOR_TYPE).to.have.property('Float32', 0x27);
+        expect(Binary.VECTOR_TYPE).to.have.property('PackedBit', 0x10);
+      });
+    });
+
+    describe('toInt8Array()', () => {
+      it('returns a copy of the bytes', function () {
+        const binary = Binary.fromInt8Array(new Int8Array([1, 2, 3]));
+        expect(binary.toInt8Array().buffer).to.not.equal(binary.buffer.buffer);
+      });
+
+      it('returns at the correct offset when ArrayBuffer is much larger than content', function () {
+        const space = new ArrayBuffer(400);
+        const view = new Uint8Array(space, 56, 4); // random view in a much larger buffer
+        const binary = new Binary(view, 9);
+        binary.buffer[0] = Binary.VECTOR_TYPE.Int8;
+        binary.buffer[1] = 0;
+        binary.buffer[2] = 255;
+        binary.buffer[3] = 255;
+        expect(binary.toInt8Array()).to.deep.equal(new Int8Array([-1, -1]));
+      });
+
+      it('returns Int8Array when sub_type is vector and datatype is INT8', () => {
+        const int8Array = new Int8Array([1, 2, 3]);
+        const binary = Binary.fromInt8Array(int8Array);
+        expect(binary.toInt8Array()).to.deep.equal(int8Array);
+      });
+
+      it('throws error when sub_type is not vector', () => {
+        const binary = new Binary(new Uint8Array([1, 2, 3]), Binary.SUBTYPE_BYTE_ARRAY);
+        expect(() => binary.toInt8Array()).to.throw(BSONError, 'Binary sub_type is not Vector');
+      });
+
+      it('throws error when datatype is not INT8', () => {
+        const binary = new Binary(
+          new Uint8Array([Binary.VECTOR_TYPE.Float32, 0, 1, 2, 3]),
+          Binary.SUBTYPE_VECTOR
+        );
+        expect(() => binary.toInt8Array()).to.throw(BSONError, 'Binary datatype field is not Int8');
+      });
+    });
+
+    describe('toFloat32Array()', () => {
+      it('returns a copy of the bytes', function () {
+        const binary = Binary.fromFloat32Array(new Float32Array([1.1, 2.2, 3.3]));
+        expect(binary.toFloat32Array().buffer).to.not.equal(binary.buffer.buffer);
+      });
+
+      it('returns at the correct offset when ArrayBuffer is much larger than content', function () {
+        const space = new ArrayBuffer(400);
+        const view = new Uint8Array(space, 56, 6); // random view in a much larger buffer
+        const binary = new Binary(view, 9);
+        binary.buffer[0] = Binary.VECTOR_TYPE.Float32;
+        binary.buffer[1] = 0;
+        // For reference:
+        // [ 0, 0, 128, 191 ] is -1 in little endian
+        binary.buffer[2] = 0;
+        binary.buffer[3] = 0;
+        binary.buffer[4] = 128;
+        binary.buffer[5] = 191;
+        expect(binary.toFloat32Array()).to.deep.equal(new Float32Array([-1]));
+      });
+
+      it('returns Float32Array when sub_type is vector and datatype is FLOAT32', () => {
+        const float32Array = new Float32Array([1.1, 2.2, 3.3]);
+        const binary = Binary.fromFloat32Array(float32Array);
+        expect(binary.toFloat32Array()).to.deep.equal(float32Array);
+      });
+
+      it('throws error when sub_type is not vector', () => {
+        const binary = new Binary(new Uint8Array([1, 2, 3]), Binary.SUBTYPE_BYTE_ARRAY);
+        expect(() => binary.toFloat32Array()).to.throw(BSONError, 'Binary sub_type is not Vector');
+      });
+
+      it('throws error when datatype is not FLOAT32', () => {
+        const binary = new Binary(
+          new Uint8Array([Binary.VECTOR_TYPE.Int8, 0, 1, 2, 3]),
+          Binary.SUBTYPE_VECTOR
+        );
+        expect(() => binary.toFloat32Array()).to.throw(
+          BSONError,
+          'Binary datatype field is not Float32'
+        );
+      });
+
+      it('transforms endianness correctly', () => {
+        // The expectation is that this test is run on LE and BE machines to
+        // demonstrate that on BE machines we get the same result
+        const float32Vector = new Uint8Array([
+          ...[Binary.VECTOR_TYPE.Float32, 0], // datatype, padding
+          ...[0, 0, 128, 191], // -1
+          ...[0, 0, 128, 191] // -1
+        ]);
+        const binary = new Binary(float32Vector, Binary.SUBTYPE_VECTOR);
+
+        // For reference:
+        // [ 0, 0, 128, 191 ] is -1 in little endian
+        // [ 191, 128, 0, 0 ] is -1 in big endian
+        // REGARDLESS of platform, BSON is ALWAYS little endian
+        expect(binary.toFloat32Array()).to.deep.equal(new Float32Array([-1, -1]));
+      });
+    });
+
+    describe('toBits()', () => {
+      it('returns Int8Array of bits when sub_type is vector and datatype is PACKED_BIT', () => {
+        const bits = new Int8Array([1, 0, 1, 1, 0, 0, 1, 0]);
+        const binary = Binary.fromBits(bits);
+        expect(binary.toBits()).to.deep.equal(bits);
+      });
+
+      it('returns at the correct offset when ArrayBuffer is much larger than content', function () {
+        const space = new ArrayBuffer(400);
+        const view = new Uint8Array(space, 56, 3); // random view in a much larger buffer
+        const binary = new Binary(view, 9);
+        binary.buffer[0] = Binary.VECTOR_TYPE.PackedBit;
+        binary.buffer[1] = 4;
+        binary.buffer[2] = 0xf0;
+        expect(binary.toBits()).to.deep.equal(new Int8Array([1, 1, 1, 1]));
+      });
+
+      it('throws error when sub_type is not vector', () => {
+        const binary = new Binary(new Uint8Array([1, 2, 3]), Binary.SUBTYPE_BYTE_ARRAY);
+        expect(() => binary.toBits()).to.throw(BSONError, 'Binary sub_type is not Vector');
+      });
+
+      it('throws error when datatype is not PACKED_BIT', () => {
+        const binary = new Binary(
+          new Uint8Array([Binary.VECTOR_TYPE.Int8, 0, 1, 2, 3]),
+          Binary.SUBTYPE_VECTOR
+        );
+        expect(() => binary.toBits()).to.throw(
+          BSONError,
+          'Binary datatype field is not packed bit'
+        );
+      });
+    });
+
+    describe('toPackedBits()', () => {
+      it('returns Uint8Array of packed bits when sub_type is vector and datatype is PACKED_BIT', () => {
+        const bits = new Uint8Array([127, 8]);
+        const binary = Binary.fromPackedBits(bits, 3);
+        expect(binary.toPackedBits()).to.deep.equal(bits);
+        expect(binary.toBits()).to.deep.equal(
+          new Int8Array([0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1])
+        );
+      });
+
+      it('returns at the correct offset when ArrayBuffer is much larger than content', function () {
+        const space = new ArrayBuffer(400);
+        const view = new Uint8Array(space, 56, 3); // random view in a much larger buffer
+        const binary = new Binary(view, 9);
+        binary.buffer[0] = Binary.VECTOR_TYPE.PackedBit;
+        binary.buffer[1] = 4;
+        binary.buffer[2] = 0xf0;
+        expect(binary.toPackedBits()).to.deep.equal(new Uint8Array([0xf0]));
+      });
+
+      it('throws error when sub_type is not vector', () => {
+        const binary = new Binary(new Uint8Array([1, 2, 3]), Binary.SUBTYPE_BYTE_ARRAY);
+        expect(() => binary.toPackedBits()).to.throw(BSONError, 'Binary sub_type is not Vector');
+      });
+
+      it('throws error when datatype is not PACKED_BIT', () => {
+        const binary = new Binary(
+          new Uint8Array([Binary.VECTOR_TYPE.Int8, 0, 1, 2, 3]),
+          Binary.SUBTYPE_VECTOR
+        );
+        expect(() => binary.toPackedBits()).to.throw(
+          BSONError,
+          'Binary datatype field is not packed bit'
+        );
+      });
+    });
+
+    describe('fromInt8Array()', () => {
+      it('creates Binary instance from Int8Array', () => {
+        const int8Array = new Int8Array([1, 2, 3]);
+        const binary = Binary.fromInt8Array(int8Array);
+        expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.Int8);
+        expect(binary.toInt8Array()).to.deep.equal(int8Array);
+      });
+
+      it('creates empty Binary instance when Int8Array is empty', () => {
+        const binary = Binary.fromInt8Array(new Int8Array(0));
+        expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.Int8);
+        expect(binary.buffer[1]).to.equal(0);
+        expect(binary.toInt8Array()).to.deep.equal(new Int8Array(0));
+      });
+    });
+
+    describe('fromFloat32Array()', () => {
+      it('creates Binary instance from Float32Array', () => {
+        const float32Array = new Float32Array([1.1, 2.2, 3.3]);
+        const binary = Binary.fromFloat32Array(float32Array);
+        expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.Float32);
+        expect(binary.toFloat32Array()).to.deep.equal(float32Array);
+      });
+
+      it('creates empty Binary instance when Float32Array is empty', () => {
+        const binary = Binary.fromFloat32Array(new Float32Array(0));
+        expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.Float32);
+        expect(binary.buffer[1]).to.equal(0);
+        expect(binary.toFloat32Array()).to.deep.equal(new Float32Array(0));
+      });
+
+      it('transforms endianness correctly', () => {
+        // The expectation is that this test is run on LE and BE machines to
+        // demonstrate that on BE machines we get the same result
+        const float32Array = new Float32Array([-1, -1]);
+        const binary = Binary.fromFloat32Array(float32Array);
+        expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.Float32);
+        expect(binary.buffer[1]).to.equal(0);
+
+        // For reference:
+        // [ 0, 0, 128, 191 ] is -1 in little endian
+        // [ 191, 128, 0, 0 ] is -1 in big endian
+        // REGARDLESS of platform, BSON is ALWAYS little endian
+        expect(Array.from(binary.buffer.subarray(2))).to.deep.equal([
+          ...[0, 0, 128, 191], // -1
+          ...[0, 0, 128, 191] // -1
+        ]);
+      });
+    });
+
+    describe('fromPackedBits()', () => {
+      it('creates Binary instance from packed bits', () => {
+        const bits = new Uint8Array([127, 8]);
+        const binary = Binary.fromPackedBits(bits, 3);
+        expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.PackedBit);
+        expect(binary.buffer[1]).to.equal(3);
+        expect(binary.buffer.subarray(2)).to.deep.equal(bits);
+      });
+
+      it('creates empty Binary instance when bits are empty', () => {
+        const binary = Binary.fromBits(new Int8Array(0));
+        expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.PackedBit);
+        expect(binary.buffer[1]).to.equal(0);
+        expect(binary.toBits()).to.deep.equal(new Int8Array(0));
+      });
+    });
+
+    describe('fromBits()', () => {
+      it('creates Binary instance from bits', () => {
+        const bits = new Int8Array([1, 0, 1, 1, 0, 0, 1, 0]);
+        const binary = Binary.fromBits(bits);
+        expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.PackedBit);
+        expect(binary.toBits()).to.deep.equal(bits);
+      });
+
+      it('creates empty Binary instance when bits are empty', () => {
+        const binary = Binary.fromBits(new Int8Array(0));
+        expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.PackedBit);
+        expect(binary.buffer[1]).to.equal(0);
+        expect(binary.toBits()).to.deep.equal(new Int8Array(0));
+      });
+
+      it('throws when values are not 1 or 0', () => {
+        expect(() => Binary.fromBits([1, 0, 2])).to.throw(BSONError, /must be 0 or 1/);
+      });
+    });
+  });
 });
diff --git a/test/node/bson_binary_vector.spec.test.ts b/test/node/bson_binary_vector.spec.test.ts
index aa40fb36..87f573ab 100644
--- a/test/node/bson_binary_vector.spec.test.ts
+++ b/test/node/bson_binary_vector.spec.test.ts
@@ -5,14 +5,6 @@ import { expect } from 'chai';
 
 const { toHex, fromHex } = BSON.onDemand.ByteUtils;
 
-const FLOAT = new Float64Array(1);
-const FLOAT_BYTES = new Uint8Array(FLOAT.buffer, 0, 8);
-
-FLOAT[0] = -1;
-// Little endian [0, 0, 0, 0, 0, 0,  240, 191]
-// Big endian    [191, 240, 0, 0, 0, 0, 0, 0]
-const isBigEndian = FLOAT_BYTES[7] === 0;
-
 type VectorHexType = '0x03' | '0x27' | '0x10';
 type VectorTest = {
   description: string;
@@ -24,36 +16,6 @@ type VectorTest = {
 };
 type VectorSuite = { description: string; test_key: string; tests: VectorTest[] };
 
-function validateVector(vector: Binary): void {
-  const VECTOR_TYPE = Object.freeze({
-    Int8: 0x03,
-    Float32: 0x27,
-    PackedBit: 0x10
-  } as const);
-
-  if (vector.sub_type !== 9) return;
-
-  const size = vector.position;
-  const d_type = vector.buffer[0] ?? 0;
-  const padding = vector.buffer[1] ?? 0;
-
-  if ((d_type === VECTOR_TYPE.Float32 || d_type === VECTOR_TYPE.Int8) && padding !== 0) {
-    throw new BSONError('Invalid Vector: padding must be zero for int8 and float32 vectors');
-  }
-
-  if (d_type === VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) {
-    throw new BSONError(
-      'Invalid Vector: padding must be zero for packed bit vectors that are empty'
-    );
-  }
-
-  if (d_type === VECTOR_TYPE.PackedBit && padding > 7) {
-    throw new BSONError(
-      `Invalid Vector: padding must be a value between 0 and 7. found: ${padding}`
-    );
-  }
-}
-
 function fixFloats(f: string | number): number {
   if (typeof f === 'number') {
     return f;
@@ -90,31 +52,15 @@ function fixBits(f: number | string): number {
 function make(vector: (number | string)[], dtype_hex: VectorHexType, padding?: number): Binary {
   let binary: Binary;
   switch (dtype_hex) {
-    case '0x10': /* packed_bit */
-    case '0x03': /* int8 */ {
-      const array = new Int8Array(vector.map(dtype_hex === '0x03' /* int8 */ ? fixInt8s : fixBits));
-      const buffer = new Uint8Array(array.byteLength + 2);
-      buffer.set(new Uint8Array(array.buffer), 2);
-      binary = new Binary(buffer, 9);
+    case '0x10' /* packed_bit */:
+      binary = Binary.fromPackedBits(new Uint8Array(vector.map(fixBits)), padding);
       break;
-    }
-
-    case '0x27': /* float32 */ {
-      const array = new Float32Array(vector.map(fixFloats));
-      const buffer = new Uint8Array(array.byteLength + 2);
-      if (isBigEndian) {
-        for (let i = 0; i < array.length; i++) {
-          const bytes = new Uint8Array(array.buffer, i * 4, 4);
-          bytes.reverse();
-          buffer.set(bytes, i * 4 + 2);
-        }
-      } else {
-        buffer.set(new Uint8Array(array.buffer), 2);
-      }
-      binary = new Binary(buffer, 9);
+    case '0x03' /* int8 */:
+      binary = Binary.fromInt8Array(new Int8Array(vector.map(fixInt8s)));
+      break;
+    case '0x27' /* float32 */:
+      binary = Binary.fromFloat32Array(new Float32Array(vector.map(fixFloats)));
       break;
-    }
-
     default:
       throw new Error(`Unknown dtype_hex: ${dtype_hex}`);
   }
@@ -206,8 +152,6 @@ describe('BSON Binary Vector spec tests', () => {
             try {
               const bin = make(test.vector, test.dtype_hex, test.padding);
               BSON.serialize({ bin });
-              // TODO(NODE-6537): The following validation MUST be a part of serialize
-              validateVector(bin);
             } catch (error) {
               thrownError = error;
             }
@@ -229,8 +173,6 @@ describe('BSON Binary Vector spec tests', () => {
             try {
               const bin = make(test.vector, test.dtype_hex, test.padding);
               BSON.EJSON.stringify({ bin });
-              // TODO(NODE-6537): The following validation MUST be a part of stringify
-              validateVector(bin);
             } catch (error) {
               thrownError = error;
             }
diff --git a/test/node/byte_utils.test.ts b/test/node/byte_utils.test.ts
index 67a4721f..df1fed0c 100644
--- a/test/node/byte_utils.test.ts
+++ b/test/node/byte_utils.test.ts
@@ -12,7 +12,7 @@ import { utf8WebPlatformSpecTests } from './data/utf8_wpt_error_cases';
 
 type ByteUtilTest<K extends keyof ByteUtils> = {
   name: string;
-  inputs: Parameters<ByteUtils[K]>;
+  inputs: Parameters<ByteUtils[K]> | (() => Parameters<ByteUtils[K]>);
   expectation: (result: {
     web: boolean;
     output: ReturnType<ByteUtils[K]> | null;
@@ -500,6 +500,23 @@ const randomBytesTests: ByteUtilTest<'randomBytes'>[] = [
     }
   }
 ];
+const swap32Tests: ByteUtilTest<'swap32'>[] = [
+  {
+    name: 'swaps byte order in-place',
+    inputs: () => [Buffer.from([1, 2, 3, 4, 5, 6, 7, 8])],
+    expectation({ output, error }) {
+      expect(error).to.be.null;
+      expect(output).to.deep.equal(Buffer.from([4, 3, 2, 1, 8, 7, 6, 5]));
+    }
+  },
+  {
+    name: 'throws if buffer is not a multiple of 4 bytes',
+    inputs: [Buffer.from([1, 2, 3])],
+    expectation({ error }) {
+      expect(error).to.be.instanceOf(RangeError);
+    }
+  }
+];
 
 const utils = new Map([
   ['nodeJsByteUtils', nodeJsByteUtils],
@@ -520,7 +537,8 @@ const table = new Map<keyof ByteUtils, ByteUtilTest<keyof ByteUtils>[]>([
   ['encodeUTF8Into', fromUTF8Tests],
   ['toUTF8', toUTF8Tests],
   ['utf8ByteLength', utf8ByteLengthTests],
-  ['randomBytes', randomBytesTests]
+  ['randomBytes', randomBytesTests],
+  ['swap32', swap32Tests]
 ]);
 
 describe('ByteUtils', () => {
@@ -790,7 +808,10 @@ describe('ByteUtils', () => {
             let error = null;
 
             try {
-              output = byteUtils[utility].call(null, ...test.inputs);
+              output = byteUtils[utility].call(
+                null,
+                ...(typeof test.inputs === 'function' ? test.inputs() : test.inputs)
+              );
             } catch (thrownError) {
               error = thrownError;
             }