diff --git a/test/node/bson_binary_vector.spec.test.ts b/test/node/bson_binary_vector.spec.test.ts new file mode 100644 index 00000000..67837305 --- /dev/null +++ b/test/node/bson_binary_vector.spec.test.ts @@ -0,0 +1,194 @@ +import * as fs from 'fs'; +import * as path from 'path'; +import { BSON, BSONError, Binary } from '../register-bson'; +import { expect } from 'chai'; + +const { toHex, fromHex } = BSON.onDemand.ByteUtils; + +const FLOAT = new Float64Array(1); +const FLOAT_BYTES = new Uint8Array(FLOAT.buffer, 0, 8); + +FLOAT[0] = -1; +// Little endian [0, 0, 0, 0, 0, 0, 240, 191] +// Big endian [191, 240, 0, 0, 0, 0, 0, 0] +const isBigEndian = FLOAT_BYTES[7] === 0; + +type DTypeAlias = 'INT8' | 'FLOAT32' | 'PACKED_BIT'; +type VectorTest = { + description: string; + vector: (number | string)[]; + valid: boolean; + dtype_hex: string; + dtype_alias: DTypeAlias; + padding: number; + canonical_bson?: string; +}; +type VectorSuite = { description: string; test_key: string; tests: VectorTest[] }; + +function fixFloats(f: string | number): number { + if (typeof f === 'number') { + return f; + } + if (f === 'inf') { + return Infinity; + } + if (f === '-inf') { + return -Infinity; + } + throw new Error(`unknown float value: ${f}`); +} + +function fixInt8s(f: number | string): number { + if (typeof f !== 'number') throw new Error('unexpected test data'); + + if (f < -128 || f > 127) { + // TODO(NODE-6537): this must be a part of the final "make a binary from" API. + throw new BSONError(`int8 out of range: ${f}`); + } + return f; +} + +function fixBits(f: number | string): number { + if (typeof f !== 'number') throw new Error('unexpected test data'); + + if (f > 255 || f < 0 || !Number.isSafeInteger(f)) { + // TODO(NODE-6537): this must be a part of the final "make a binary from" API. + throw new BSONError(`bit out of range: ${f}`); + } + return f; +} + +/** TODO(NODE-6537): Replace the following with final "make a binary from" API */ +function VECTOR_TO_BINARY( + vector: (number | string)[], + dtype_hex: string, + dtype_alias: DTypeAlias, + padding: number +): Binary { + switch (dtype_alias) { + case 'PACKED_BIT': + case 'INT8': { + const array = new Int8Array(vector.map(dtype_alias === 'INT8' ? fixInt8s : fixBits)); + const buffer = new Uint8Array(array.byteLength + 2); + buffer[0] = +dtype_hex; + buffer[1] = padding; + buffer.set(new Uint8Array(array.buffer), 2); + return new Binary(buffer, 9); + } + + case 'FLOAT32': { + const array = new Float32Array(vector.map(fixFloats)); + const buffer = new Uint8Array(array.byteLength + 2); + buffer[0] = +dtype_hex; + buffer[1] = padding; + if (isBigEndian) { + for (let i = 0; i < array.length; i++) { + const bytes = new Uint8Array(array.buffer, i * 4, 4); + bytes.reverse(); + buffer.set(bytes, i * 4 + 2); + } + } else { + buffer.set(new Uint8Array(array.buffer), 2); + } + return new Binary(buffer, 9); + } + + default: + throw new Error(`Unknown dtype_alias: ${dtype_alias}`); + } +} + +describe('BSON Binary Vector spec tests', () => { + const tests: Record = Object.create(null); + + for (const file of fs.readdirSync(path.join(__dirname, 'specs/bson-binary-vector'))) { + tests[file.split('.')[0]] = JSON.parse( + fs.readFileSync(path.join(__dirname, 'specs/bson-binary-vector', file), 'utf8') + ); + } + + for (const [suiteName, suite] of Object.entries(tests)) { + describe(suiteName, function () { + const valid = suite.tests.filter(t => t.valid); + const invalid = suite.tests.filter(t => !t.valid); + describe('valid', function () { + /** + * 1. encode a document from the numeric values, dtype, and padding, along with the "test_key", and assert this matches the canonical_bson string. + * 2. decode the canonical_bson into its binary form, and then assert that the numeric values, dtype, and padding all match those provided in the JSON. + * + * > Note: For floating point number types, exact numerical matches may not be possible. + * > Drivers that natively support the floating-point type being tested (e.g., when testing float32 vector values in a driver that natively supports float32), + * > MUST assert that the input float array is the same after encoding and decoding. + */ + for (const test of valid) { + it(`encode ${test.description}`, function () { + const bin = VECTOR_TO_BINARY( + test.vector, + test.dtype_hex, + test.dtype_alias, + test.padding + ); + + const buffer = BSON.serialize({ [suite.test_key]: bin }); + expect(toHex(buffer)).to.equal(test.canonical_bson!.toLowerCase()); + }); + + it(`decode ${test.description}`, function () { + const canonical_bson = fromHex(test.canonical_bson!.toLowerCase()); + const doc = BSON.deserialize(canonical_bson); + + expect(doc[suite.test_key].sub_type).to.equal(0x09); + expect(doc[suite.test_key].buffer[0]).to.equal(+test.dtype_hex); + expect(doc[suite.test_key].buffer[1]).to.equal(test.padding); + }); + } + }); + + describe('invalid', function () { + /** + * To prove correct in an invalid case (valid:false), + * one MUST raise an exception when attempting to encode + * a document from the numeric values, dtype, and padding. + */ + for (const test of invalid) { + it(test.description, function () { + expect(() => { + // Errors are thrown when creating the binary because of invalid values in the vector. + const binary = VECTOR_TO_BINARY( + test.vector, + test.dtype_hex, + test.dtype_alias, + test.padding + ); + // vector assertions TODO(NODE-6537): Replace the following with final "make a binary from" API. + if (binary.sub_type === 0x09) { + const enum dtype { + float32 = 0x27, + int8 = 0x03, + bit = 0x10 + } + + const size = binary.position; + const data = binary.buffer; + const d_type = data[0] ?? 0; + const padding = data[1] ?? 0; + + if ((d_type === dtype.float32 || d_type === dtype.int8) && padding !== 0) { + throw new BSONError('padding must be zero for int8 and float32 vectors'); + } + + if (d_type === dtype.bit && padding !== 0 && size === 2) { + throw new BSONError('padding must be zero for packed bit vectors that are empty'); + } + + if (d_type === dtype.bit && padding > 7) { + throw new BSONError(`padding must be a value between 0 and 7. found: ${data[1]}`); + } + } + }).to.throw(BSONError); + }); + } + }); + }); + } +}); diff --git a/test/node/specs/bson-binary-vector/float32.json b/test/node/specs/bson-binary-vector/float32.json new file mode 100644 index 00000000..872c4353 --- /dev/null +++ b/test/node/specs/bson-binary-vector/float32.json @@ -0,0 +1,51 @@ +{ + "description": "Tests of Binary subtype 9, Vectors, with dtype FLOAT32", + "test_key": "vector", + "tests": [ + { + "description": "Simple Vector FLOAT32", + "valid": true, + "vector": [127.0, 7.0], + "dtype_hex": "0x27", + "dtype_alias": "FLOAT32", + "padding": 0, + "canonical_bson": "1C00000005766563746F72000A0000000927000000FE420000E04000" + }, + { + "description": "Vector with decimals and negative value FLOAT32", + "valid": true, + "vector": [127.7, -7.7], + "dtype_hex": "0x27", + "dtype_alias": "FLOAT32", + "padding": 0, + "canonical_bson": "1C00000005766563746F72000A0000000927006666FF426666F6C000" + }, + { + "description": "Empty Vector FLOAT32", + "valid": true, + "vector": [], + "dtype_hex": "0x27", + "dtype_alias": "FLOAT32", + "padding": 0, + "canonical_bson": "1400000005766563746F72000200000009270000" + }, + { + "description": "Infinity Vector FLOAT32", + "valid": true, + "vector": ["-inf", 0.0, "inf"], + "dtype_hex": "0x27", + "dtype_alias": "FLOAT32", + "padding": 0, + "canonical_bson": "2000000005766563746F72000E000000092700000080FF000000000000807F00" + }, + { + "description": "FLOAT32 with padding", + "valid": false, + "vector": [127.0, 7.0], + "dtype_hex": "0x27", + "dtype_alias": "FLOAT32", + "padding": 3 + } + ] +} + diff --git a/test/node/specs/bson-binary-vector/int8.json b/test/node/specs/bson-binary-vector/int8.json new file mode 100644 index 00000000..7529721e --- /dev/null +++ b/test/node/specs/bson-binary-vector/int8.json @@ -0,0 +1,57 @@ +{ + "description": "Tests of Binary subtype 9, Vectors, with dtype INT8", + "test_key": "vector", + "tests": [ + { + "description": "Simple Vector INT8", + "valid": true, + "vector": [127, 7], + "dtype_hex": "0x03", + "dtype_alias": "INT8", + "padding": 0, + "canonical_bson": "1600000005766563746F7200040000000903007F0700" + }, + { + "description": "Empty Vector INT8", + "valid": true, + "vector": [], + "dtype_hex": "0x03", + "dtype_alias": "INT8", + "padding": 0, + "canonical_bson": "1400000005766563746F72000200000009030000" + }, + { + "description": "Overflow Vector INT8", + "valid": false, + "vector": [128], + "dtype_hex": "0x03", + "dtype_alias": "INT8", + "padding": 0 + }, + { + "description": "Underflow Vector INT8", + "valid": false, + "vector": [-129], + "dtype_hex": "0x03", + "dtype_alias": "INT8", + "padding": 0 + }, + { + "description": "INT8 with padding", + "valid": false, + "vector": [127, 7], + "dtype_hex": "0x03", + "dtype_alias": "INT8", + "padding": 3 + }, + { + "description": "INT8 with float inputs", + "valid": false, + "vector": [127.77, 7.77], + "dtype_hex": "0x03", + "dtype_alias": "INT8", + "padding": 0 + } + ] +} + diff --git a/test/node/specs/bson-binary-vector/packed_bit.json b/test/node/specs/bson-binary-vector/packed_bit.json new file mode 100644 index 00000000..035776e8 --- /dev/null +++ b/test/node/specs/bson-binary-vector/packed_bit.json @@ -0,0 +1,98 @@ +{ + "description": "Tests of Binary subtype 9, Vectors, with dtype PACKED_BIT", + "test_key": "vector", + "tests": [ + { + "description": "Padding specified with no vector data PACKED_BIT", + "valid": false, + "vector": [], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 1 + }, + { + "description": "Simple Vector PACKED_BIT", + "valid": true, + "vector": [127, 7], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 0, + "canonical_bson": "1600000005766563746F7200040000000910007F0700" + }, + { + "description": "Empty Vector PACKED_BIT", + "valid": true, + "vector": [], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 0, + "canonical_bson": "1400000005766563746F72000200000009100000" + }, + { + "description": "PACKED_BIT with padding", + "valid": true, + "vector": [127, 7], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 3, + "canonical_bson": "1600000005766563746F7200040000000910037F0700" + }, + { + "description": "Overflow Vector PACKED_BIT", + "valid": false, + "vector": [256], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 0 + }, + { + "description": "Underflow Vector PACKED_BIT", + "valid": false, + "vector": [-1], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 0 + }, + { + "description": "Vector with float values PACKED_BIT", + "valid": false, + "vector": [127.5], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 0 + }, + { + "description": "Padding specified with no vector data PACKED_BIT", + "valid": false, + "vector": [], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 1 + }, + { + "description": "Exceeding maximum padding PACKED_BIT", + "valid": false, + "vector": [1], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 8 + }, + { + "description": "Negative padding PACKED_BIT", + "valid": false, + "vector": [1], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": -1 + }, + { + "description": "Vector with float values PACKED_BIT", + "valid": false, + "vector": [127.5], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 0 + } + ] +} + diff --git a/test/node/specs/bson-corpus/binary.json b/test/node/specs/bson-corpus/binary.json index 20aaef74..0e0056f3 100644 --- a/test/node/specs/bson-corpus/binary.json +++ b/test/node/specs/bson-corpus/binary.json @@ -74,6 +74,36 @@ "description": "$type query operator (conflicts with legacy $binary form with $type field)", "canonical_bson": "180000000378001000000010247479706500020000000000", "canonical_extjson": "{\"x\" : { \"$type\" : {\"$numberInt\": \"2\"}}}" + }, + { + "description": "subtype 0x09 Vector FLOAT32", + "canonical_bson": "170000000578000A0000000927000000FE420000E04000", + "canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"JwAAAP5CAADgQA==\", \"subType\": \"09\"}}}" + }, + { + "description": "subtype 0x09 Vector INT8", + "canonical_bson": "11000000057800040000000903007F0700", + "canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"AwB/Bw==\", \"subType\": \"09\"}}}" + }, + { + "description": "subtype 0x09 Vector PACKED_BIT", + "canonical_bson": "11000000057800040000000910007F0700", + "canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"EAB/Bw==\", \"subType\": \"09\"}}}" + }, + { + "description": "subtype 0x09 Vector (Zero-length) FLOAT32", + "canonical_bson": "0F0000000578000200000009270000", + "canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"JwA=\", \"subType\": \"09\"}}}" + }, + { + "description": "subtype 0x09 Vector (Zero-length) INT8", + "canonical_bson": "0F0000000578000200000009030000", + "canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"AwA=\", \"subType\": \"09\"}}}" + }, + { + "description": "subtype 0x09 Vector (Zero-length) PACKED_BIT", + "canonical_bson": "0F0000000578000200000009100000", + "canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"EAA=\", \"subType\": \"09\"}}}" } ], "decodeErrors": [