Skip to content

Commit

Permalink
test(NODE-6534): add spec test runner for Binary vector
Browse files Browse the repository at this point in the history
  • Loading branch information
nbbeeken committed Nov 14, 2024
1 parent f99fdfd commit 6f4acc4
Show file tree
Hide file tree
Showing 5 changed files with 430 additions and 0 deletions.
194 changes: 194 additions & 0 deletions test/node/bson_binary_vector.spec.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
import * as fs from 'fs';
import * as path from 'path';
import { BSON, BSONError, Binary } from '../register-bson';
import { expect } from 'chai';

const { toHex, fromHex } = BSON.onDemand.ByteUtils;

const FLOAT = new Float64Array(1);
const FLOAT_BYTES = new Uint8Array(FLOAT.buffer, 0, 8);

FLOAT[0] = -1;
// Little endian [0, 0, 0, 0, 0, 0, 240, 191]
// Big endian [191, 240, 0, 0, 0, 0, 0, 0]
const isBigEndian = FLOAT_BYTES[7] === 0;

type DTypeAlias = 'INT8' | 'FLOAT32' | 'PACKED_BIT';
type VectorTest = {
description: string;
vector: (number | string)[];
valid: boolean;
dtype_hex: string;
dtype_alias: DTypeAlias;
padding: number;
canonical_bson?: string;
};
type VectorSuite = { description: string; test_key: string; tests: VectorTest[] };

function fixFloats(f: string | number): number {
if (typeof f === 'number') {
return f;
}
if (f === 'inf') {
return Infinity;
}
if (f === '-inf') {
return -Infinity;
}
throw new Error(`unknown float value: ${f}`);
}

function fixInt8s(f: number | string): number {
if (typeof f !== 'number') throw new Error('unexpected test data');

if (f < -128 || f > 127) {
// TODO(NODE-6537): this must be a part of the final "make a binary from" API.
throw new BSONError(`int8 out of range: ${f}`);
}
return f;
}

function fixBits(f: number | string): number {
if (typeof f !== 'number') throw new Error('unexpected test data');

if (f > 255 || f < 0 || !Number.isSafeInteger(f)) {
// TODO(NODE-6537): this must be a part of the final "make a binary from" API.
throw new BSONError(`bit out of range: ${f}`);
}
return f;
}

/** TODO(NODE-6537): Replace the following with final "make a binary from" API */
function VECTOR_TO_BINARY(
vector: (number | string)[],
dtype_hex: string,
dtype_alias: DTypeAlias,
padding: number
): Binary {
switch (dtype_alias) {
case 'PACKED_BIT':
case 'INT8': {
const array = new Int8Array(vector.map(dtype_alias === 'INT8' ? fixInt8s : fixBits));
const buffer = new Uint8Array(array.byteLength + 2);
buffer[0] = +dtype_hex;
buffer[1] = padding;
buffer.set(new Uint8Array(array.buffer), 2);
return new Binary(buffer, 9);
}

case 'FLOAT32': {
const array = new Float32Array(vector.map(fixFloats));
const buffer = new Uint8Array(array.byteLength + 2);
buffer[0] = +dtype_hex;
buffer[1] = padding;
if (isBigEndian) {
for (let i = 0; i < array.length; i++) {
const bytes = new Uint8Array(array.buffer, i * 4, 4);
bytes.reverse();
buffer.set(bytes, i * 4 + 2);
}
} else {
buffer.set(new Uint8Array(array.buffer), 2);
}
return new Binary(buffer, 9);
}

default:
throw new Error(`Unknown dtype_alias: ${dtype_alias}`);
}
}

describe('BSON Binary Vector spec tests', () => {
const tests: Record<string, VectorSuite> = Object.create(null);

for (const file of fs.readdirSync(path.join(__dirname, 'specs/bson-binary-vector'))) {
tests[file.split('.')[0]] = JSON.parse(
fs.readFileSync(path.join(__dirname, 'specs/bson-binary-vector', file), 'utf8')
);
}

for (const [suiteName, suite] of Object.entries(tests)) {
describe(suiteName, function () {
const valid = suite.tests.filter(t => t.valid);
const invalid = suite.tests.filter(t => !t.valid);
describe('valid', function () {
/**
* 1. encode a document from the numeric values, dtype, and padding, along with the "test_key", and assert this matches the canonical_bson string.
* 2. decode the canonical_bson into its binary form, and then assert that the numeric values, dtype, and padding all match those provided in the JSON.
*
* > Note: For floating point number types, exact numerical matches may not be possible.
* > Drivers that natively support the floating-point type being tested (e.g., when testing float32 vector values in a driver that natively supports float32),
* > MUST assert that the input float array is the same after encoding and decoding.
*/
for (const test of valid) {
it(`encode ${test.description}`, function () {
const bin = VECTOR_TO_BINARY(
test.vector,
test.dtype_hex,
test.dtype_alias,
test.padding
);

const buffer = BSON.serialize({ [suite.test_key]: bin });
expect(toHex(buffer)).to.equal(test.canonical_bson!.toLowerCase());
});

it(`decode ${test.description}`, function () {
const canonical_bson = fromHex(test.canonical_bson!.toLowerCase());
const doc = BSON.deserialize(canonical_bson);

expect(doc[suite.test_key].sub_type).to.equal(0x09);
expect(doc[suite.test_key].buffer[0]).to.equal(+test.dtype_hex);
expect(doc[suite.test_key].buffer[1]).to.equal(test.padding);
});
}
});

describe('invalid', function () {
/**
* To prove correct in an invalid case (valid:false),
* one MUST raise an exception when attempting to encode
* a document from the numeric values, dtype, and padding.
*/
for (const test of invalid) {
it(test.description, function () {
expect(() => {
// Errors are thrown when creating the binary because of invalid values in the vector.
const binary = VECTOR_TO_BINARY(
test.vector,
test.dtype_hex,
test.dtype_alias,
test.padding
);
// vector assertions TODO(NODE-6537): Replace the following with final "make a binary from" API.
if (binary.sub_type === 0x09) {
const enum dtype {
float32 = 0x27,
int8 = 0x03,
bit = 0x10
}

const size = binary.position;
const data = binary.buffer;
const d_type = data[0] ?? 0;
const padding = data[1] ?? 0;

if ((d_type === dtype.float32 || d_type === dtype.int8) && padding !== 0) {
throw new BSONError('padding must be zero for int8 and float32 vectors');
}

if (d_type === dtype.bit && padding !== 0 && size === 2) {
throw new BSONError('padding must be zero for packed bit vectors that are empty');
}

if (d_type === dtype.bit && padding > 7) {
throw new BSONError(`padding must be a value between 0 and 7. found: ${data[1]}`);
}
}
}).to.throw(BSONError);
});
}
});
});
}
});
51 changes: 51 additions & 0 deletions test/node/specs/bson-binary-vector/float32.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"description": "Tests of Binary subtype 9, Vectors, with dtype FLOAT32",
"test_key": "vector",
"tests": [
{
"description": "Simple Vector FLOAT32",
"valid": true,
"vector": [127.0, 7.0],
"dtype_hex": "0x27",
"dtype_alias": "FLOAT32",
"padding": 0,
"canonical_bson": "1C00000005766563746F72000A0000000927000000FE420000E04000"
},
{
"description": "Vector with decimals and negative value FLOAT32",
"valid": true,
"vector": [127.7, -7.7],
"dtype_hex": "0x27",
"dtype_alias": "FLOAT32",
"padding": 0,
"canonical_bson": "1C00000005766563746F72000A0000000927006666FF426666F6C000"
},
{
"description": "Empty Vector FLOAT32",
"valid": true,
"vector": [],
"dtype_hex": "0x27",
"dtype_alias": "FLOAT32",
"padding": 0,
"canonical_bson": "1400000005766563746F72000200000009270000"
},
{
"description": "Infinity Vector FLOAT32",
"valid": true,
"vector": ["-inf", 0.0, "inf"],
"dtype_hex": "0x27",
"dtype_alias": "FLOAT32",
"padding": 0,
"canonical_bson": "2000000005766563746F72000E000000092700000080FF000000000000807F00"
},
{
"description": "FLOAT32 with padding",
"valid": false,
"vector": [127.0, 7.0],
"dtype_hex": "0x27",
"dtype_alias": "FLOAT32",
"padding": 3
}
]
}

57 changes: 57 additions & 0 deletions test/node/specs/bson-binary-vector/int8.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{
"description": "Tests of Binary subtype 9, Vectors, with dtype INT8",
"test_key": "vector",
"tests": [
{
"description": "Simple Vector INT8",
"valid": true,
"vector": [127, 7],
"dtype_hex": "0x03",
"dtype_alias": "INT8",
"padding": 0,
"canonical_bson": "1600000005766563746F7200040000000903007F0700"
},
{
"description": "Empty Vector INT8",
"valid": true,
"vector": [],
"dtype_hex": "0x03",
"dtype_alias": "INT8",
"padding": 0,
"canonical_bson": "1400000005766563746F72000200000009030000"
},
{
"description": "Overflow Vector INT8",
"valid": false,
"vector": [128],
"dtype_hex": "0x03",
"dtype_alias": "INT8",
"padding": 0
},
{
"description": "Underflow Vector INT8",
"valid": false,
"vector": [-129],
"dtype_hex": "0x03",
"dtype_alias": "INT8",
"padding": 0
},
{
"description": "INT8 with padding",
"valid": false,
"vector": [127, 7],
"dtype_hex": "0x03",
"dtype_alias": "INT8",
"padding": 3
},
{
"description": "INT8 with float inputs",
"valid": false,
"vector": [127.77, 7.77],
"dtype_hex": "0x03",
"dtype_alias": "INT8",
"padding": 0
}
]
}

Loading

0 comments on commit 6f4acc4

Please sign in to comment.