From d7bdcec04349af697ead5655d14c4494f307b6a1 Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Mon, 18 Nov 2024 18:05:52 -0500 Subject: [PATCH] feat(NODE-6537): add support for binary vectors (#730) --- .evergreen/run-big-endian-test.sh | 5 - etc/Dockerfile | 10 + etc/run-big-endian-test.sh | 22 ++ src/binary.ts | 218 ++++++++++++++++++ src/parser/serializer.ts | 6 +- src/utils/byte_utils.ts | 2 + src/utils/node_byte_utils.ts | 7 +- src/utils/number_utils.ts | 4 + src/utils/web_byte_utils.ts | 21 +- test/node/binary.test.ts | 268 +++++++++++++++++++++- test/node/bson_binary_vector.spec.test.ts | 72 +----- test/node/byte_utils.test.ts | 27 ++- 12 files changed, 585 insertions(+), 77 deletions(-) delete mode 100644 .evergreen/run-big-endian-test.sh create mode 100644 etc/Dockerfile create mode 100644 etc/run-big-endian-test.sh diff --git a/.evergreen/run-big-endian-test.sh b/.evergreen/run-big-endian-test.sh deleted file mode 100644 index a1cc8240..00000000 --- a/.evergreen/run-big-endian-test.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash - -source $DRIVERS_TOOLS/.evergreen/init-node-and-npm-env.sh - -npx mocha test/s390x/big_endian.test.ts diff --git a/etc/Dockerfile b/etc/Dockerfile new file mode 100644 index 00000000..3ceb2a50 --- /dev/null +++ b/etc/Dockerfile @@ -0,0 +1,10 @@ +FROM node:22 AS build + +WORKDIR /bson +COPY . . + +RUN rm -rf node_modules && npm install && npm test + +FROM scratch + +COPY --from=build /bson/docs/ / diff --git a/etc/run-big-endian-test.sh b/etc/run-big-endian-test.sh new file mode 100644 index 00000000..775cc6ee --- /dev/null +++ b/etc/run-big-endian-test.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +# At the time of writing. This script is not used in CI. +# but can be used to locally iterate on big endian bugs. +# buildx requires an output, so I put docs which should be a no-op. + +set -o errexit +set -o nounset +set -o pipefail +set -o xtrace + +# If you get an error you may have an outdated buildkit version +# Try running this: +# docker buildx rm builder && docker buildx create --name builder --bootstrap --use + +docker buildx build \ + --progress=plain \ + --platform linux/s390x \ + --build-arg="NODE_ARCH=s390x" \ + -f ./etc/Dockerfile \ + --output type=local,dest=./docs,platform-split=false \ + . diff --git a/src/binary.ts b/src/binary.ts index d3b496c3..1fe09805 100644 --- a/src/binary.ts +++ b/src/binary.ts @@ -4,6 +4,7 @@ import { BSONError } from './error'; import { BSON_BINARY_SUBTYPE_UUID_NEW } from './constants'; import { ByteUtils } from './utils/byte_utils'; import { BSONValue } from './bson_value'; +import { NumberUtils } from './utils/number_utils'; /** @public */ export type BinarySequence = Uint8Array | number[]; @@ -58,9 +59,18 @@ export class Binary extends BSONValue { static readonly SUBTYPE_COLUMN = 7; /** Sensitive BSON type */ static readonly SUBTYPE_SENSITIVE = 8; + /** Vector BSON type */ + static readonly SUBTYPE_VECTOR = 9; /** User BSON type */ static readonly SUBTYPE_USER_DEFINED = 128; + /** datatype of a Binary Vector (subtype: 9) */ + static readonly VECTOR_TYPE = Object.freeze({ + Int8: 0x03, + Float32: 0x27, + PackedBit: 0x10 + } as const); + /** * The bytes of the Binary value. * @@ -238,6 +248,11 @@ export class Binary extends BSONValue { /** @internal */ toExtendedJSON(options?: EJSONOptions): BinaryExtendedLegacy | BinaryExtended { options = options || {}; + + if (this.sub_type === Binary.SUBTYPE_VECTOR) { + validateBinaryVector(this); + } + const base64String = ByteUtils.toBase64(this.buffer); const subType = Number(this.sub_type).toString(16); @@ -310,6 +325,209 @@ export class Binary extends BSONValue { const subTypeArg = inspect(this.sub_type, options); return `Binary.createFromBase64(${base64Arg}, ${subTypeArg})`; } + + /** + * If this Binary represents a Int8 Vector (`binary.buffer[0] === Binary.VECTOR_TYPE.Int8`), + * returns a copy of the bytes in a new Int8Array. + * + * If the Binary is not a Vector, or the datatype is not Int8, an error is thrown. + */ + public toInt8Array(): Int8Array { + if (this.sub_type !== Binary.SUBTYPE_VECTOR) { + throw new BSONError('Binary sub_type is not Vector'); + } + + if (this.buffer[0] !== Binary.VECTOR_TYPE.Int8) { + throw new BSONError('Binary datatype field is not Int8'); + } + + return new Int8Array( + this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position) + ); + } + + /** + * If this Binary represents a Float32 Vector (`binary.buffer[0] === Binary.VECTOR_TYPE.Float32`), + * returns a copy of the bytes in a new Float32Array. + * + * If the Binary is not a Vector, or the datatype is not Float32, an error is thrown. + */ + public toFloat32Array(): Float32Array { + if (this.sub_type !== Binary.SUBTYPE_VECTOR) { + throw new BSONError('Binary sub_type is not Vector'); + } + + if (this.buffer[0] !== Binary.VECTOR_TYPE.Float32) { + throw new BSONError('Binary datatype field is not Float32'); + } + + const floatBytes = new Uint8Array( + this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position) + ); + + if (NumberUtils.isBigEndian) ByteUtils.swap32(floatBytes); + + return new Float32Array(floatBytes.buffer); + } + + /** + * If this Binary represents packed bit Vector (`binary.buffer[0] === Binary.VECTOR_TYPE.PackedBit`), + * returns a copy of the bytes that are packed bits. + * + * Use `toBits` to get the unpacked bits. + * + * If the Binary is not a Vector, or the datatype is not PackedBit, an error is thrown. + */ + public toPackedBits(): Uint8Array { + if (this.sub_type !== Binary.SUBTYPE_VECTOR) { + throw new BSONError('Binary sub_type is not Vector'); + } + + if (this.buffer[0] !== Binary.VECTOR_TYPE.PackedBit) { + throw new BSONError('Binary datatype field is not packed bit'); + } + + return new Uint8Array( + this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position) + ); + } + + /** + * If this Binary represents a Packed bit Vector (`binary.buffer[0] === Binary.VECTOR_TYPE.PackedBit`), + * returns a copy of the bit unpacked into a new Int8Array. + * + * Use `toPackedBits` to get the bits still in packed form. + * + * If the Binary is not a Vector, or the datatype is not PackedBit, an error is thrown. + */ + public toBits(): Int8Array { + if (this.sub_type !== Binary.SUBTYPE_VECTOR) { + throw new BSONError('Binary sub_type is not Vector'); + } + + if (this.buffer[0] !== Binary.VECTOR_TYPE.PackedBit) { + throw new BSONError('Binary datatype field is not packed bit'); + } + + const byteCount = this.length() - 2; + const bitCount = byteCount * 8 - this.buffer[1]; + const bits = new Int8Array(bitCount); + + for (let bitOffset = 0; bitOffset < bits.length; bitOffset++) { + const byteOffset = (bitOffset / 8) | 0; + const byte = this.buffer[byteOffset + 2]; + const shift = 7 - (bitOffset % 8); + const bit = (byte >> shift) & 1; + bits[bitOffset] = bit; + } + + return bits; + } + + /** + * Constructs a Binary representing an Int8 Vector. + * @param array - The array to store as a view on the Binary class + */ + public static fromInt8Array(array: Int8Array): Binary { + const buffer = ByteUtils.allocate(array.byteLength + 2); + buffer[0] = Binary.VECTOR_TYPE.Int8; + buffer[1] = 0; + const intBytes = new Uint8Array(array.buffer, array.byteOffset, array.byteLength); + buffer.set(intBytes, 2); + return new this(buffer, this.SUBTYPE_VECTOR); + } + + /** Constructs a Binary representing an Float32 Vector. */ + public static fromFloat32Array(array: Float32Array): Binary { + const binaryBytes = ByteUtils.allocate(array.byteLength + 2); + binaryBytes[0] = Binary.VECTOR_TYPE.Float32; + binaryBytes[1] = 0; + + const floatBytes = new Uint8Array(array.buffer, array.byteOffset, array.byteLength); + binaryBytes.set(floatBytes, 2); + + if (NumberUtils.isBigEndian) ByteUtils.swap32(new Uint8Array(binaryBytes.buffer, 2)); + + return new this(binaryBytes, this.SUBTYPE_VECTOR); + } + + /** + * Constructs a Binary representing a packed bit Vector. + * + * Use `fromBits` to pack an array of 1s and 0s. + */ + public static fromPackedBits(array: Uint8Array, padding = 0): Binary { + const buffer = ByteUtils.allocate(array.byteLength + 2); + buffer[0] = Binary.VECTOR_TYPE.PackedBit; + buffer[1] = padding; + buffer.set(array, 2); + return new this(buffer, this.SUBTYPE_VECTOR); + } + + /** + * Constructs a Binary representing an Packed Bit Vector. + * @param array - The array of 1s and 0s to pack into the Binary instance + */ + public static fromBits(bits: ArrayLike): Binary { + const byteLength = (bits.length + 7) >>> 3; // ceil(bits.length / 8) + const bytes = new Uint8Array(byteLength + 2); + bytes[0] = Binary.VECTOR_TYPE.PackedBit; + + const remainder = bits.length % 8; + bytes[1] = remainder === 0 ? 0 : 8 - remainder; + + for (let bitOffset = 0; bitOffset < bits.length; bitOffset++) { + const byteOffset = bitOffset >>> 3; // floor(bitOffset / 8) + const bit = bits[bitOffset]; + + if (bit !== 0 && bit !== 1) { + throw new BSONError( + `Invalid bit value at ${bitOffset}: must be 0 or 1, found ${bits[bitOffset]}` + ); + } + + if (bit === 0) continue; + + const shift = 7 - (bitOffset % 8); + bytes[byteOffset + 2] |= bit << shift; + } + + return new this(bytes, Binary.SUBTYPE_VECTOR); + } +} + +export function validateBinaryVector(vector: Binary): void { + if (vector.sub_type !== Binary.SUBTYPE_VECTOR) return; + + const size = vector.position; + + // NOTE: Validation is only applied to **KNOWN** vector types + // If a new datatype is introduced, a future version of the library will need to add validation + const datatype = vector.buffer[0]; + + // NOTE: We do not enable noUncheckedIndexedAccess so TS believes this is always number + // a Binary vector may be empty, in which case the padding is undefined + // this possible value is tolerable for our validation checks + const padding: number | undefined = vector.buffer[1]; + + if ( + (datatype === Binary.VECTOR_TYPE.Float32 || datatype === Binary.VECTOR_TYPE.Int8) && + padding !== 0 + ) { + throw new BSONError('Invalid Vector: padding must be zero for int8 and float32 vectors'); + } + + if (datatype === Binary.VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) { + throw new BSONError( + 'Invalid Vector: padding must be zero for packed bit vectors that are empty' + ); + } + + if (datatype === Binary.VECTOR_TYPE.PackedBit && padding > 7) { + throw new BSONError( + `Invalid Vector: padding must be a value between 0 and 7. found: ${padding}` + ); + } } /** @public */ diff --git a/src/parser/serializer.ts b/src/parser/serializer.ts index fbb47245..787bfa8a 100644 --- a/src/parser/serializer.ts +++ b/src/parser/serializer.ts @@ -1,4 +1,4 @@ -import { Binary } from '../binary'; +import { Binary, validateBinaryVector } from '../binary'; import type { BSONSymbol, DBRef, Document, MaxKey } from '../bson'; import type { Code } from '../code'; import * as constants from '../constants'; @@ -495,6 +495,10 @@ function serializeBinary(buffer: Uint8Array, key: string, value: Binary, index: index += NumberUtils.setInt32LE(buffer, index, size); } + if (value.sub_type === Binary.SUBTYPE_VECTOR) { + validateBinaryVector(value); + } + if (size <= 16) { for (let i = 0; i < size; i++) buffer[index + i] = data[i]; } else { diff --git a/src/utils/byte_utils.ts b/src/utils/byte_utils.ts index f3da53fd..05e30515 100644 --- a/src/utils/byte_utils.ts +++ b/src/utils/byte_utils.ts @@ -39,6 +39,8 @@ export type ByteUtils = { encodeUTF8Into: (destination: Uint8Array, source: string, byteOffset: number) => number; /** Generate a Uint8Array filled with random bytes with byteLength */ randomBytes: (byteLength: number) => Uint8Array; + /** Interprets `buffer` as an array of 32-bit values and swaps the byte order in-place. */ + swap32: (buffer: Uint8Array) => Uint8Array; }; declare const Buffer: { new (): unknown; prototype?: { _isBuffer?: boolean } } | undefined; diff --git a/src/utils/node_byte_utils.ts b/src/utils/node_byte_utils.ts index 7836345f..b47e723d 100644 --- a/src/utils/node_byte_utils.ts +++ b/src/utils/node_byte_utils.ts @@ -9,6 +9,7 @@ type NodeJsBuffer = ArrayBufferView & copy(target: Uint8Array, targetStart: number, sourceStart: number, sourceEnd: number): number; toString: (this: Uint8Array, encoding: NodeJsEncoding, start?: number, end?: number) => string; equals: (this: Uint8Array, other: Uint8Array) => boolean; + swap32: (this: NodeJsBuffer) => NodeJsBuffer; }; type NodeJsBufferConstructor = Omit & { alloc: (size: number) => NodeJsBuffer; @@ -159,5 +160,9 @@ export const nodeJsByteUtils = { return nodeJsByteUtils.toLocalBufferType(buffer).write(source, byteOffset, undefined, 'utf8'); }, - randomBytes: nodejsRandomBytes + randomBytes: nodejsRandomBytes, + + swap32(buffer: Uint8Array): NodeJsBuffer { + return nodeJsByteUtils.toLocalBufferType(buffer).swap32(); + } }; diff --git a/src/utils/number_utils.ts b/src/utils/number_utils.ts index 32f6f5cc..02f4dbeb 100644 --- a/src/utils/number_utils.ts +++ b/src/utils/number_utils.ts @@ -13,6 +13,8 @@ const isBigEndian = FLOAT_BYTES[7] === 0; * A collection of functions that get or set various numeric types and bit widths from a Uint8Array. */ export type NumberUtils = { + /** Is true if the current system is big endian. */ + isBigEndian: boolean; /** * Parses a signed int32 at offset. Throws a `RangeError` if value is negative. */ @@ -35,6 +37,8 @@ export type NumberUtils = { * @public */ export const NumberUtils: NumberUtils = { + isBigEndian, + getNonnegativeInt32LE(source: Uint8Array, offset: number): number { if (source[offset + 3] > 127) { throw new RangeError(`Size cannot be negative at offset: ${offset}`); diff --git a/src/utils/web_byte_utils.ts b/src/utils/web_byte_utils.ts index 9d9c343f..336d37ed 100644 --- a/src/utils/web_byte_utils.ts +++ b/src/utils/web_byte_utils.ts @@ -193,5 +193,24 @@ export const webByteUtils = { return bytes.byteLength; }, - randomBytes: webRandomBytes + randomBytes: webRandomBytes, + + swap32(buffer: Uint8Array): Uint8Array { + if (buffer.length % 4 !== 0) { + throw new RangeError('Buffer size must be a multiple of 32-bits'); + } + + for (let i = 0; i < buffer.length; i += 4) { + const byte0 = buffer[i]; + const byte1 = buffer[i + 1]; + const byte2 = buffer[i + 2]; + const byte3 = buffer[i + 3]; + buffer[i] = byte3; + buffer[i + 1] = byte2; + buffer[i + 2] = byte1; + buffer[i + 3] = byte0; + } + + return buffer; + } }; diff --git a/test/node/binary.test.ts b/test/node/binary.test.ts index 1f94a619..20ab1cf0 100644 --- a/test/node/binary.test.ts +++ b/test/node/binary.test.ts @@ -1,6 +1,6 @@ import { expect } from 'chai'; import * as vm from 'node:vm'; -import { __isWeb__, Binary, BSON } from '../register-bson'; +import { __isWeb__, Binary, BSON, BSONError } from '../register-bson'; import * as util from 'node:util'; describe('class Binary', () => { @@ -249,4 +249,270 @@ describe('class Binary', () => { expect(roundTrippedBin.bin.toJSON()).to.equal(bin.toJSON()); }); }); + + describe('sub_type vector', () => { + describe('datatype constants', () => { + it('has Int8, Float32 and PackedBit', () => { + expect(Binary.VECTOR_TYPE).to.have.property('Int8', 0x03); + expect(Binary.VECTOR_TYPE).to.have.property('Float32', 0x27); + expect(Binary.VECTOR_TYPE).to.have.property('PackedBit', 0x10); + }); + }); + + describe('toInt8Array()', () => { + it('returns a copy of the bytes', function () { + const binary = Binary.fromInt8Array(new Int8Array([1, 2, 3])); + expect(binary.toInt8Array().buffer).to.not.equal(binary.buffer.buffer); + }); + + it('returns at the correct offset when ArrayBuffer is much larger than content', function () { + const space = new ArrayBuffer(400); + const view = new Uint8Array(space, 56, 4); // random view in a much larger buffer + const binary = new Binary(view, 9); + binary.buffer[0] = Binary.VECTOR_TYPE.Int8; + binary.buffer[1] = 0; + binary.buffer[2] = 255; + binary.buffer[3] = 255; + expect(binary.toInt8Array()).to.deep.equal(new Int8Array([-1, -1])); + }); + + it('returns Int8Array when sub_type is vector and datatype is INT8', () => { + const int8Array = new Int8Array([1, 2, 3]); + const binary = Binary.fromInt8Array(int8Array); + expect(binary.toInt8Array()).to.deep.equal(int8Array); + }); + + it('throws error when sub_type is not vector', () => { + const binary = new Binary(new Uint8Array([1, 2, 3]), Binary.SUBTYPE_BYTE_ARRAY); + expect(() => binary.toInt8Array()).to.throw(BSONError, 'Binary sub_type is not Vector'); + }); + + it('throws error when datatype is not INT8', () => { + const binary = new Binary( + new Uint8Array([Binary.VECTOR_TYPE.Float32, 0, 1, 2, 3]), + Binary.SUBTYPE_VECTOR + ); + expect(() => binary.toInt8Array()).to.throw(BSONError, 'Binary datatype field is not Int8'); + }); + }); + + describe('toFloat32Array()', () => { + it('returns a copy of the bytes', function () { + const binary = Binary.fromFloat32Array(new Float32Array([1.1, 2.2, 3.3])); + expect(binary.toFloat32Array().buffer).to.not.equal(binary.buffer.buffer); + }); + + it('returns at the correct offset when ArrayBuffer is much larger than content', function () { + const space = new ArrayBuffer(400); + const view = new Uint8Array(space, 56, 6); // random view in a much larger buffer + const binary = new Binary(view, 9); + binary.buffer[0] = Binary.VECTOR_TYPE.Float32; + binary.buffer[1] = 0; + // For reference: + // [ 0, 0, 128, 191 ] is -1 in little endian + binary.buffer[2] = 0; + binary.buffer[3] = 0; + binary.buffer[4] = 128; + binary.buffer[5] = 191; + expect(binary.toFloat32Array()).to.deep.equal(new Float32Array([-1])); + }); + + it('returns Float32Array when sub_type is vector and datatype is FLOAT32', () => { + const float32Array = new Float32Array([1.1, 2.2, 3.3]); + const binary = Binary.fromFloat32Array(float32Array); + expect(binary.toFloat32Array()).to.deep.equal(float32Array); + }); + + it('throws error when sub_type is not vector', () => { + const binary = new Binary(new Uint8Array([1, 2, 3]), Binary.SUBTYPE_BYTE_ARRAY); + expect(() => binary.toFloat32Array()).to.throw(BSONError, 'Binary sub_type is not Vector'); + }); + + it('throws error when datatype is not FLOAT32', () => { + const binary = new Binary( + new Uint8Array([Binary.VECTOR_TYPE.Int8, 0, 1, 2, 3]), + Binary.SUBTYPE_VECTOR + ); + expect(() => binary.toFloat32Array()).to.throw( + BSONError, + 'Binary datatype field is not Float32' + ); + }); + + it('transforms endianness correctly', () => { + // The expectation is that this test is run on LE and BE machines to + // demonstrate that on BE machines we get the same result + const float32Vector = new Uint8Array([ + ...[Binary.VECTOR_TYPE.Float32, 0], // datatype, padding + ...[0, 0, 128, 191], // -1 + ...[0, 0, 128, 191] // -1 + ]); + const binary = new Binary(float32Vector, Binary.SUBTYPE_VECTOR); + + // For reference: + // [ 0, 0, 128, 191 ] is -1 in little endian + // [ 191, 128, 0, 0 ] is -1 in big endian + // REGARDLESS of platform, BSON is ALWAYS little endian + expect(binary.toFloat32Array()).to.deep.equal(new Float32Array([-1, -1])); + }); + }); + + describe('toBits()', () => { + it('returns Int8Array of bits when sub_type is vector and datatype is PACKED_BIT', () => { + const bits = new Int8Array([1, 0, 1, 1, 0, 0, 1, 0]); + const binary = Binary.fromBits(bits); + expect(binary.toBits()).to.deep.equal(bits); + }); + + it('returns at the correct offset when ArrayBuffer is much larger than content', function () { + const space = new ArrayBuffer(400); + const view = new Uint8Array(space, 56, 3); // random view in a much larger buffer + const binary = new Binary(view, 9); + binary.buffer[0] = Binary.VECTOR_TYPE.PackedBit; + binary.buffer[1] = 4; + binary.buffer[2] = 0xf0; + expect(binary.toBits()).to.deep.equal(new Int8Array([1, 1, 1, 1])); + }); + + it('throws error when sub_type is not vector', () => { + const binary = new Binary(new Uint8Array([1, 2, 3]), Binary.SUBTYPE_BYTE_ARRAY); + expect(() => binary.toBits()).to.throw(BSONError, 'Binary sub_type is not Vector'); + }); + + it('throws error when datatype is not PACKED_BIT', () => { + const binary = new Binary( + new Uint8Array([Binary.VECTOR_TYPE.Int8, 0, 1, 2, 3]), + Binary.SUBTYPE_VECTOR + ); + expect(() => binary.toBits()).to.throw( + BSONError, + 'Binary datatype field is not packed bit' + ); + }); + }); + + describe('toPackedBits()', () => { + it('returns Uint8Array of packed bits when sub_type is vector and datatype is PACKED_BIT', () => { + const bits = new Uint8Array([127, 8]); + const binary = Binary.fromPackedBits(bits, 3); + expect(binary.toPackedBits()).to.deep.equal(bits); + expect(binary.toBits()).to.deep.equal( + new Int8Array([0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1]) + ); + }); + + it('returns at the correct offset when ArrayBuffer is much larger than content', function () { + const space = new ArrayBuffer(400); + const view = new Uint8Array(space, 56, 3); // random view in a much larger buffer + const binary = new Binary(view, 9); + binary.buffer[0] = Binary.VECTOR_TYPE.PackedBit; + binary.buffer[1] = 4; + binary.buffer[2] = 0xf0; + expect(binary.toPackedBits()).to.deep.equal(new Uint8Array([0xf0])); + }); + + it('throws error when sub_type is not vector', () => { + const binary = new Binary(new Uint8Array([1, 2, 3]), Binary.SUBTYPE_BYTE_ARRAY); + expect(() => binary.toPackedBits()).to.throw(BSONError, 'Binary sub_type is not Vector'); + }); + + it('throws error when datatype is not PACKED_BIT', () => { + const binary = new Binary( + new Uint8Array([Binary.VECTOR_TYPE.Int8, 0, 1, 2, 3]), + Binary.SUBTYPE_VECTOR + ); + expect(() => binary.toPackedBits()).to.throw( + BSONError, + 'Binary datatype field is not packed bit' + ); + }); + }); + + describe('fromInt8Array()', () => { + it('creates Binary instance from Int8Array', () => { + const int8Array = new Int8Array([1, 2, 3]); + const binary = Binary.fromInt8Array(int8Array); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.Int8); + expect(binary.toInt8Array()).to.deep.equal(int8Array); + }); + + it('creates empty Binary instance when Int8Array is empty', () => { + const binary = Binary.fromInt8Array(new Int8Array(0)); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.Int8); + expect(binary.buffer[1]).to.equal(0); + expect(binary.toInt8Array()).to.deep.equal(new Int8Array(0)); + }); + }); + + describe('fromFloat32Array()', () => { + it('creates Binary instance from Float32Array', () => { + const float32Array = new Float32Array([1.1, 2.2, 3.3]); + const binary = Binary.fromFloat32Array(float32Array); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.Float32); + expect(binary.toFloat32Array()).to.deep.equal(float32Array); + }); + + it('creates empty Binary instance when Float32Array is empty', () => { + const binary = Binary.fromFloat32Array(new Float32Array(0)); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.Float32); + expect(binary.buffer[1]).to.equal(0); + expect(binary.toFloat32Array()).to.deep.equal(new Float32Array(0)); + }); + + it('transforms endianness correctly', () => { + // The expectation is that this test is run on LE and BE machines to + // demonstrate that on BE machines we get the same result + const float32Array = new Float32Array([-1, -1]); + const binary = Binary.fromFloat32Array(float32Array); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.Float32); + expect(binary.buffer[1]).to.equal(0); + + // For reference: + // [ 0, 0, 128, 191 ] is -1 in little endian + // [ 191, 128, 0, 0 ] is -1 in big endian + // REGARDLESS of platform, BSON is ALWAYS little endian + expect(Array.from(binary.buffer.subarray(2))).to.deep.equal([ + ...[0, 0, 128, 191], // -1 + ...[0, 0, 128, 191] // -1 + ]); + }); + }); + + describe('fromPackedBits()', () => { + it('creates Binary instance from packed bits', () => { + const bits = new Uint8Array([127, 8]); + const binary = Binary.fromPackedBits(bits, 3); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.PackedBit); + expect(binary.buffer[1]).to.equal(3); + expect(binary.buffer.subarray(2)).to.deep.equal(bits); + }); + + it('creates empty Binary instance when bits are empty', () => { + const binary = Binary.fromBits(new Int8Array(0)); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.PackedBit); + expect(binary.buffer[1]).to.equal(0); + expect(binary.toBits()).to.deep.equal(new Int8Array(0)); + }); + }); + + describe('fromBits()', () => { + it('creates Binary instance from bits', () => { + const bits = new Int8Array([1, 0, 1, 1, 0, 0, 1, 0]); + const binary = Binary.fromBits(bits); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.PackedBit); + expect(binary.toBits()).to.deep.equal(bits); + }); + + it('creates empty Binary instance when bits are empty', () => { + const binary = Binary.fromBits(new Int8Array(0)); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.PackedBit); + expect(binary.buffer[1]).to.equal(0); + expect(binary.toBits()).to.deep.equal(new Int8Array(0)); + }); + + it('throws when values are not 1 or 0', () => { + expect(() => Binary.fromBits([1, 0, 2])).to.throw(BSONError, /must be 0 or 1/); + }); + }); + }); }); diff --git a/test/node/bson_binary_vector.spec.test.ts b/test/node/bson_binary_vector.spec.test.ts index aa40fb36..87f573ab 100644 --- a/test/node/bson_binary_vector.spec.test.ts +++ b/test/node/bson_binary_vector.spec.test.ts @@ -5,14 +5,6 @@ import { expect } from 'chai'; const { toHex, fromHex } = BSON.onDemand.ByteUtils; -const FLOAT = new Float64Array(1); -const FLOAT_BYTES = new Uint8Array(FLOAT.buffer, 0, 8); - -FLOAT[0] = -1; -// Little endian [0, 0, 0, 0, 0, 0, 240, 191] -// Big endian [191, 240, 0, 0, 0, 0, 0, 0] -const isBigEndian = FLOAT_BYTES[7] === 0; - type VectorHexType = '0x03' | '0x27' | '0x10'; type VectorTest = { description: string; @@ -24,36 +16,6 @@ type VectorTest = { }; type VectorSuite = { description: string; test_key: string; tests: VectorTest[] }; -function validateVector(vector: Binary): void { - const VECTOR_TYPE = Object.freeze({ - Int8: 0x03, - Float32: 0x27, - PackedBit: 0x10 - } as const); - - if (vector.sub_type !== 9) return; - - const size = vector.position; - const d_type = vector.buffer[0] ?? 0; - const padding = vector.buffer[1] ?? 0; - - if ((d_type === VECTOR_TYPE.Float32 || d_type === VECTOR_TYPE.Int8) && padding !== 0) { - throw new BSONError('Invalid Vector: padding must be zero for int8 and float32 vectors'); - } - - if (d_type === VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) { - throw new BSONError( - 'Invalid Vector: padding must be zero for packed bit vectors that are empty' - ); - } - - if (d_type === VECTOR_TYPE.PackedBit && padding > 7) { - throw new BSONError( - `Invalid Vector: padding must be a value between 0 and 7. found: ${padding}` - ); - } -} - function fixFloats(f: string | number): number { if (typeof f === 'number') { return f; @@ -90,31 +52,15 @@ function fixBits(f: number | string): number { function make(vector: (number | string)[], dtype_hex: VectorHexType, padding?: number): Binary { let binary: Binary; switch (dtype_hex) { - case '0x10': /* packed_bit */ - case '0x03': /* int8 */ { - const array = new Int8Array(vector.map(dtype_hex === '0x03' /* int8 */ ? fixInt8s : fixBits)); - const buffer = new Uint8Array(array.byteLength + 2); - buffer.set(new Uint8Array(array.buffer), 2); - binary = new Binary(buffer, 9); + case '0x10' /* packed_bit */: + binary = Binary.fromPackedBits(new Uint8Array(vector.map(fixBits)), padding); break; - } - - case '0x27': /* float32 */ { - const array = new Float32Array(vector.map(fixFloats)); - const buffer = new Uint8Array(array.byteLength + 2); - if (isBigEndian) { - for (let i = 0; i < array.length; i++) { - const bytes = new Uint8Array(array.buffer, i * 4, 4); - bytes.reverse(); - buffer.set(bytes, i * 4 + 2); - } - } else { - buffer.set(new Uint8Array(array.buffer), 2); - } - binary = new Binary(buffer, 9); + case '0x03' /* int8 */: + binary = Binary.fromInt8Array(new Int8Array(vector.map(fixInt8s))); + break; + case '0x27' /* float32 */: + binary = Binary.fromFloat32Array(new Float32Array(vector.map(fixFloats))); break; - } - default: throw new Error(`Unknown dtype_hex: ${dtype_hex}`); } @@ -206,8 +152,6 @@ describe('BSON Binary Vector spec tests', () => { try { const bin = make(test.vector, test.dtype_hex, test.padding); BSON.serialize({ bin }); - // TODO(NODE-6537): The following validation MUST be a part of serialize - validateVector(bin); } catch (error) { thrownError = error; } @@ -229,8 +173,6 @@ describe('BSON Binary Vector spec tests', () => { try { const bin = make(test.vector, test.dtype_hex, test.padding); BSON.EJSON.stringify({ bin }); - // TODO(NODE-6537): The following validation MUST be a part of stringify - validateVector(bin); } catch (error) { thrownError = error; } diff --git a/test/node/byte_utils.test.ts b/test/node/byte_utils.test.ts index 67a4721f..df1fed0c 100644 --- a/test/node/byte_utils.test.ts +++ b/test/node/byte_utils.test.ts @@ -12,7 +12,7 @@ import { utf8WebPlatformSpecTests } from './data/utf8_wpt_error_cases'; type ByteUtilTest = { name: string; - inputs: Parameters; + inputs: Parameters | (() => Parameters); expectation: (result: { web: boolean; output: ReturnType | null; @@ -500,6 +500,23 @@ const randomBytesTests: ByteUtilTest<'randomBytes'>[] = [ } } ]; +const swap32Tests: ByteUtilTest<'swap32'>[] = [ + { + name: 'swaps byte order in-place', + inputs: () => [Buffer.from([1, 2, 3, 4, 5, 6, 7, 8])], + expectation({ output, error }) { + expect(error).to.be.null; + expect(output).to.deep.equal(Buffer.from([4, 3, 2, 1, 8, 7, 6, 5])); + } + }, + { + name: 'throws if buffer is not a multiple of 4 bytes', + inputs: [Buffer.from([1, 2, 3])], + expectation({ error }) { + expect(error).to.be.instanceOf(RangeError); + } + } +]; const utils = new Map([ ['nodeJsByteUtils', nodeJsByteUtils], @@ -520,7 +537,8 @@ const table = new Map[]>([ ['encodeUTF8Into', fromUTF8Tests], ['toUTF8', toUTF8Tests], ['utf8ByteLength', utf8ByteLengthTests], - ['randomBytes', randomBytesTests] + ['randomBytes', randomBytesTests], + ['swap32', swap32Tests] ]); describe('ByteUtils', () => { @@ -790,7 +808,10 @@ describe('ByteUtils', () => { let error = null; try { - output = byteUtils[utility].call(null, ...test.inputs); + output = byteUtils[utility].call( + null, + ...(typeof test.inputs === 'function' ? test.inputs() : test.inputs) + ); } catch (thrownError) { error = thrownError; }