diff --git a/src/bson.ts b/src/bson.ts index f7e0fb56be..260b60aba1 100644 --- a/src/bson.ts +++ b/src/bson.ts @@ -132,3 +132,14 @@ export function resolveBSONOptions( options?.enableUtf8Validation ?? parentOptions?.enableUtf8Validation ?? true }; } + +/** @internal */ +export function parseUtf8ValidationOption(options?: { enableUtf8Validation?: boolean }): { + utf8: { writeErrors: false } | false; +} { + const enableUtf8Validation = options?.enableUtf8Validation; + if (enableUtf8Validation === false) { + return { utf8: false }; + } + return { utf8: { writeErrors: false } }; +} diff --git a/src/cmap/connection.ts b/src/cmap/connection.ts index e1fb3f9663..666e92fb8c 100644 --- a/src/cmap/connection.ts +++ b/src/cmap/connection.ts @@ -1,3 +1,4 @@ +import { type DeserializeOptions } from 'bson'; import { type Readable, Transform, type TransformCallback } from 'stream'; import { clearTimeout, setTimeout } from 'timers'; @@ -487,7 +488,7 @@ export class Connection extends TypedEventEmitter { // If `documentsReturnedIn` not set or raw is not enabled, use input bson options // Otherwise, support raw flag. Raw only works for cursors that hardcode firstBatch/nextBatch fields - const bsonOptions = + const bsonOptions: DeserializeOptions = options.documentsReturnedIn == null || !options.raw ? options : { diff --git a/src/cmap/wire_protocol/on_demand/document.ts b/src/cmap/wire_protocol/on_demand/document.ts index 67f5b3a091..efc7e32288 100644 --- a/src/cmap/wire_protocol/on_demand/document.ts +++ b/src/cmap/wire_protocol/on_demand/document.ts @@ -1,8 +1,9 @@ +import { type DeserializeOptions } from 'bson'; + import { Binary, type BSONElement, BSONError, - type BSONSerializeOptions, BSONType, deserialize, getBigInt64LE, @@ -10,7 +11,6 @@ import { getInt32LE, ObjectId, parseToElementsToArray, - pluckBSONSerializeOptions, Timestamp, toUTF8 } from '../../../bson'; @@ -44,6 +44,14 @@ export type JSTypeOf = { /** @internal */ type CachedBSONElement = { element: BSONElement; value: any | undefined }; +/** + * @internal + * + * Options for `OnDemandDocument.toObject()`. Validation is required to ensure + * that callers provide utf8 validation options. */ +export type OnDemandDocumentDeserializeOptions = Omit & + Required>; + /** @internal */ export class OnDemandDocument { /** @@ -330,24 +338,12 @@ export class OnDemandDocument { * Deserialize this object, DOES NOT cache result so avoid multiple invocations * @param options - BSON deserialization options */ - public toObject(options?: BSONSerializeOptions): Record { - const exactBSONOptions = { - ...pluckBSONSerializeOptions(options ?? {}), - validation: this.parseBsonSerializationOptions(options), + public toObject(options?: OnDemandDocumentDeserializeOptions): Record { + return deserialize(this.bson, { + ...options, index: this.offset, allowObjectSmallerThanBufferSize: true - }; - return deserialize(this.bson, exactBSONOptions); - } - - private parseBsonSerializationOptions(options?: { enableUtf8Validation?: boolean }): { - utf8: { writeErrors: false } | false; - } { - const enableUtf8Validation = options?.enableUtf8Validation; - if (enableUtf8Validation === false) { - return { utf8: false }; - } - return { utf8: { writeErrors: false } }; + }); } /** Returns this document's bytes only */ diff --git a/src/cmap/wire_protocol/responses.ts b/src/cmap/wire_protocol/responses.ts index 9837634bfb..e69cf84cfc 100644 --- a/src/cmap/wire_protocol/responses.ts +++ b/src/cmap/wire_protocol/responses.ts @@ -1,3 +1,5 @@ +import { type DeserializeOptions } from 'bson'; + import { type BSONElement, type BSONSerializeOptions, @@ -5,12 +7,18 @@ import { type Document, Long, parseToElementsToArray, + parseUtf8ValidationOption, + pluckBSONSerializeOptions, type Timestamp } from '../../bson'; import { MongoUnexpectedServerResponseError } from '../../error'; import { type ClusterTime } from '../../sdam/common'; import { decorateDecryptionResult, ns } from '../../utils'; -import { type JSTypeOf, OnDemandDocument } from './on_demand/document'; +import { + type JSTypeOf, + OnDemandDocument, + type OnDemandDocumentDeserializeOptions +} from './on_demand/document'; // eslint-disable-next-line no-restricted-syntax const enum BSONElementOffset { @@ -112,7 +120,8 @@ export class MongoDBResponse extends OnDemandDocument { this.get('recoveryToken', BSONType.object)?.toObject({ promoteValues: false, promoteLongs: false, - promoteBuffers: false + promoteBuffers: false, + validation: { utf8: true } }) ?? null ); } @@ -165,6 +174,14 @@ export class MongoDBResponse extends OnDemandDocument { } return this.clusterTime ?? null; } + + public override toObject(options?: BSONSerializeOptions): Record { + const exactBSONOptions = { + ...pluckBSONSerializeOptions(options ?? {}), + validation: parseUtf8ValidationOption(options) + }; + return super.toObject(exactBSONOptions); + } } /** @internal */ @@ -248,12 +265,13 @@ export class CursorResponse extends MongoDBResponse { this.cursor.get('postBatchResumeToken', BSONType.object)?.toObject({ promoteValues: false, promoteLongs: false, - promoteBuffers: false + promoteBuffers: false, + validation: { utf8: true } }) ?? null ); } - public shift(options?: BSONSerializeOptions): any { + public shift(options: OnDemandDocumentDeserializeOptions): any { if (this.iterated >= this.batchSize) { return null; } @@ -305,7 +323,7 @@ export class ExplainedCursorResponse extends CursorResponse { return this._length; } - override shift(options?: BSONSerializeOptions | undefined) { + override shift(options?: DeserializeOptions) { if (this._length === 0) return null; this._length -= 1; return this.toObject(options); diff --git a/src/cursor/abstract_cursor.ts b/src/cursor/abstract_cursor.ts index 4525ae19fe..274733e40a 100644 --- a/src/cursor/abstract_cursor.ts +++ b/src/cursor/abstract_cursor.ts @@ -1,6 +1,7 @@ import { Readable, Transform } from 'stream'; import { type BSONSerializeOptions, type Document, Long, pluckBSONSerializeOptions } from '../bson'; +import { type OnDemandDocumentDeserializeOptions } from '../cmap/wire_protocol/on_demand/document'; import { type CursorResponse } from '../cmap/wire_protocol/responses'; import { MongoAPIError, @@ -157,6 +158,9 @@ export abstract class AbstractCursor< /** @event */ static readonly CLOSE = 'close' as const; + /** @internal */ + protected deserializationOptions: OnDemandDocumentDeserializeOptions; + /** @internal */ protected constructor( client: MongoClient, @@ -211,6 +215,13 @@ export abstract class AbstractCursor< } else { this.cursorSession = this.cursorClient.startSession({ owner: this, explicit: false }); } + + this.deserializationOptions = { + ...this.cursorOptions, + validation: { + utf8: options?.enableUtf8Validation === false ? false : true + } + }; } /** @@ -304,7 +315,7 @@ export abstract class AbstractCursor< ); for (let count = 0; count < documentsToRead; count++) { - const document = this.documents?.shift(this.cursorOptions); + const document = this.documents?.shift(this.deserializationOptions); if (document != null) { bufferedDocs.push(document); } @@ -406,7 +417,7 @@ export abstract class AbstractCursor< } do { - const doc = this.documents?.shift(this.cursorOptions); + const doc = this.documents?.shift(this.deserializationOptions); if (doc != null) { if (this.transform != null) return await this.transformDocument(doc); return doc; @@ -425,7 +436,7 @@ export abstract class AbstractCursor< throw new MongoCursorExhaustedError(); } - let doc = this.documents?.shift(this.cursorOptions); + let doc = this.documents?.shift(this.deserializationOptions); if (doc != null) { if (this.transform != null) return await this.transformDocument(doc); return doc; @@ -433,7 +444,7 @@ export abstract class AbstractCursor< await this.fetchBatch(); - doc = this.documents?.shift(this.cursorOptions); + doc = this.documents?.shift(this.deserializationOptions); if (doc != null) { if (this.transform != null) return await this.transformDocument(doc); return doc; diff --git a/src/cursor/aggregation_cursor.ts b/src/cursor/aggregation_cursor.ts index c843f6c47e..622bce14aa 100644 --- a/src/cursor/aggregation_cursor.ts +++ b/src/cursor/aggregation_cursor.ts @@ -76,7 +76,7 @@ export class AggregationCursor extends AbstractCursor { explain: verbosity ?? true }) ) - ).shift(this.aggregateOptions); + ).shift(this.deserializationOptions); } /** Add a stage to the aggregation pipeline diff --git a/src/cursor/find_cursor.ts b/src/cursor/find_cursor.ts index e4b3dbc03c..ef21cea290 100644 --- a/src/cursor/find_cursor.ts +++ b/src/cursor/find_cursor.ts @@ -143,7 +143,7 @@ export class FindCursor extends AbstractCursor { explain: verbosity ?? true }) ) - ).shift(this.findOptions); + ).shift(this.deserializationOptions); } /** Set the cursor query */ diff --git a/src/index.ts b/src/index.ts index 19b2359379..eeebbd1154 100644 --- a/src/index.ts +++ b/src/index.ts @@ -302,7 +302,11 @@ export type { ClientMetadata, ClientMetadataOptions } from './cmap/handshake/cli export type { ConnectionPoolMetrics } from './cmap/metrics'; export type { StreamDescription, StreamDescriptionOptions } from './cmap/stream_description'; export type { CompressorName } from './cmap/wire_protocol/compression'; -export type { JSTypeOf, OnDemandDocument } from './cmap/wire_protocol/on_demand/document'; +export type { + JSTypeOf, + OnDemandDocument, + OnDemandDocumentDeserializeOptions +} from './cmap/wire_protocol/on_demand/document'; export type { CursorResponse, MongoDBResponse, diff --git a/test/integration/node-specific/bson-options/utf8_validation.test.ts b/test/integration/node-specific/bson-options/utf8_validation.test.ts index d6345a884d..85f128b533 100644 --- a/test/integration/node-specific/bson-options/utf8_validation.test.ts +++ b/test/integration/node-specific/bson-options/utf8_validation.test.ts @@ -1,165 +1,82 @@ import { expect } from 'chai'; import * as net from 'net'; import * as sinon from 'sinon'; -import { inspect } from 'util'; import { BSON, BSONError, type Collection, - deserialize, type MongoClient, MongoServerError, - OnDemandDocument, OpMsgResponse } from '../../../mongodb'; -const EXPECTED_VALIDATION_DISABLED_ARGUMENT = { - utf8: false -}; - -const EXPECTED_VALIDATION_ENABLED_ARGUMENT = { - utf8: { - writeErrors: false - } -}; - describe('class MongoDBResponse', () => { - let bsonSpy: sinon.SinonSpy; - - beforeEach(() => { - // @ts-expect-error private function - bsonSpy = sinon.spy(OnDemandDocument.prototype, 'parseBsonSerializationOptions'); - }); - - afterEach(() => { - bsonSpy?.restore(); - bsonSpy = null; - }); - let client; afterEach(async () => { + sinon.restore(); if (client) await client.close(); }); - describe('enableUtf8Validation option set to false', () => { - const option = { enableUtf8Validation: false }; - - for (const passOptionTo of ['client', 'db', 'collection', 'operation']) { - it(`should disable validation with option passed to ${passOptionTo}`, async function () { - client = this.configuration.newClient(passOptionTo === 'client' ? option : undefined); - - const db = client.db('bson_utf8Validation_db', passOptionTo === 'db' ? option : undefined); - const collection = db.collection( - 'bson_utf8Validation_coll', - passOptionTo === 'collection' ? option : undefined - ); - - await collection.insertOne( - { name: 'John Doe' }, - passOptionTo === 'operation' ? option : {} - ); - - expect(bsonSpy).to.have.been.called; - const result = bsonSpy.lastCall.returnValue; - expect(result).to.deep.equal(EXPECTED_VALIDATION_DISABLED_ARGUMENT); - }); - } - }); - - describe('enableUtf8Validation option set to true', () => { - // define client and option for tests to use - const option = { enableUtf8Validation: true }; - for (const passOptionTo of ['client', 'db', 'collection', 'operation']) { - it(`should enable validation with option passed to ${passOptionTo}`, async function () { - client = this.configuration.newClient(passOptionTo === 'client' ? option : undefined); - await client.connect(); - - const db = client.db('bson_utf8Validation_db', passOptionTo === 'db' ? option : undefined); - const collection = db.collection( - 'bson_utf8Validation_coll', - passOptionTo === 'collection' ? option : undefined - ); - - await collection.insertOne( - { name: 'John Doe' }, - passOptionTo === 'operation' ? option : {} - ); - - expect(bsonSpy).to.have.been.called; - const result = bsonSpy.lastCall.returnValue; - expect(result).to.deep.equal(EXPECTED_VALIDATION_ENABLED_ARGUMENT); - }); - } - }); - - describe('enableUtf8Validation option not set', () => { - const option = {}; - for (const passOptionTo of ['client', 'db', 'collection', 'operation']) { - it(`should default to enabled with option passed to ${passOptionTo}`, async function () { - client = this.configuration.newClient(passOptionTo === 'client' ? option : undefined); - await client.connect(); - - const db = client.db('bson_utf8Validation_db', passOptionTo === 'db' ? option : undefined); - const collection = db.collection( - 'bson_utf8Validation_coll', - passOptionTo === 'collection' ? option : undefined - ); - - await collection.insertOne( - { name: 'John Doe' }, - passOptionTo === 'operation' ? option : {} - ); - - expect(bsonSpy).to.have.been.called; - const result = bsonSpy.lastCall.returnValue; - expect(result).to.deep.equal(EXPECTED_VALIDATION_ENABLED_ARGUMENT); - }); - } - }); - context( - 'when the server is given a long multibyte utf sequence and there is a writeError', + 'when the server is given a long multibyte utf sequence and there is a writeError that includes invalid utf8', () => { let client: MongoClient; - beforeEach(async function () { - client = this.configuration.newClient(); - }); - - afterEach(async function () { - sinon.restore(); - await client.db('parsing').dropDatabase(); - await client.close(); - }); - - it('does not throw a UTF-8 parsing error', async () => { - // Insert a large string of multibyte UTF-8 characters - const _id = '\u{1F92A}'.repeat(100); - - const test = client.db('parsing').collection<{ _id: string }>('parsing'); - await test.insertOne({ _id }); - - const spy = sinon.spy(OpMsgResponse.prototype, 'parse'); - - const error = await test.insertOne({ _id }).catch(error => error); - - // Check that the server sent us broken BSON (bad UTF) - expect(() => { - BSON.deserialize(spy.returnValues[0], { validation: { utf8: true } }); - }).to.throw(BSON.BSONError, /Invalid UTF/i); - - // Assert the driver squashed it - expect(error).to.be.instanceOf(MongoServerError); - expect(error.message).to.match(/duplicate/i); - expect(error.message).to.not.match(/utf/i); - expect(error.errmsg).to.include('\uFFFD'); - }); + let error: MongoServerError; + for (const { optionDescription, options } of [ + { optionDescription: 'explicitly enabled', options: { enableUtf8Validation: true } }, + { optionDescription: 'explicitly disabled', options: { enableUtf8Validation: false } }, + { optionDescription: 'omitted', options: {} } + ]) { + context('when utf8 validation is ' + optionDescription, function () { + beforeEach(async function () { + client = this.configuration.newClient(); + + async function generateWriteErrorWithInvalidUtf8() { + // Insert a large string of multibyte UTF-8 characters + const _id = '\u{1F92A}'.repeat(100); + + const test = client.db('parsing').collection<{ _id: string }>('parsing'); + await test.insertOne({ _id }, options); + + const spy = sinon.spy(OpMsgResponse.prototype, 'parse'); + + error = await test.insertOne({ _id }).catch(error => error); + + // Check that the server sent us broken BSON (bad UTF) + expect(() => { + BSON.deserialize(spy.returnValues[0], { validation: { utf8: true } }); + }).to.throw( + BSON.BSONError, + /Invalid UTF/i, + 'did not generate error with invalid utf8' + ); + } + + await generateWriteErrorWithInvalidUtf8(); + }); + + afterEach(async function () { + sinon.restore(); + await client.db('parsing').dropDatabase(); + await client.close(); + }); + + it('does not throw a UTF-8 parsing error', function () { + // Assert the driver squashed it + expect(error).to.be.instanceOf(MongoServerError); + expect(error.message).to.match(/duplicate/i); + expect(error.message).to.not.match(/utf/i); + expect(error.errmsg).to.include('\uFFFD'); + }); + }); + } } ); }); -describe('utf8 validation with cursors', function () { +describe('parsing of utf8-invalid documents with cursors', function () { let client: MongoClient; let collection: Collection; @@ -176,7 +93,9 @@ describe('utf8 validation with cursors', function () { if (providedBuffer.includes(targetBytes)) { if (providedBuffer.split(targetBytes).length !== 2) { sinon.restore(); - const message = `too many target bytes sequences: received ${providedBuffer.split(targetBytes).length}\n. command: ${inspect(deserialize(args[0]), { depth: Infinity })}`; + const message = `too many target bytes sequences: received ${ + providedBuffer.split(targetBytes).length + }`; throw new Error(message); } const buffer = Buffer.from(providedBuffer.replace(targetBytes, 'c301'.repeat(8)), 'hex'); @@ -254,7 +173,7 @@ describe('utf8 validation with cursors', function () { } context('when utf-8 validation is explicitly enabled', function () { - it('a for-await loop throw a BSON error', async function () { + it('a for-await loop throws a BSON error', async function () { await expectReject(async () => { for await (const _doc of collection.find({}, { enableUtf8Validation: true })); }); @@ -295,7 +214,7 @@ describe('utf8 validation with cursors', function () { }); context('utf-8 validation defaults to enabled', function () { - it('a for-await loop throw a BSON error', async function () { + it('a for-await loop throws a BSON error', async function () { await expectReject(async () => { for await (const _doc of collection.find({})); }); diff --git a/test/unit/cmap/wire_protocol/responses.test.ts b/test/unit/cmap/wire_protocol/responses.test.ts index 7fccbfc7fc..9f2527d8a3 100644 --- a/test/unit/cmap/wire_protocol/responses.test.ts +++ b/test/unit/cmap/wire_protocol/responses.test.ts @@ -1,9 +1,5 @@ import { expect } from 'chai'; -import * as sinon from 'sinon'; -// to spy on the bson module, we must import it from the driver -// eslint-disable-next-line @typescript-eslint/no-restricted-imports -import * as mdb from '../../../../src/bson'; import { CursorResponse, Int32, @@ -17,74 +13,6 @@ describe('class MongoDBResponse', () => { it('is a subclass of OnDemandDocument', () => { expect(new MongoDBResponse(serialize({ ok: 1 }))).to.be.instanceOf(OnDemandDocument); }); - - context('utf8 validation', () => { - let deseriailzeSpy: sinon.SinonStub>; - beforeEach(function () { - const deserialize = mdb.deserialize; - deseriailzeSpy = sinon.stub>().callsFake(deserialize); - sinon.stub(mdb, 'deserialize').get(() => { - return deseriailzeSpy; - }); - }); - afterEach(function () { - sinon.restore(); - }); - - context('when enableUtf8Validation is not specified', () => { - const options = { enableUtf8Validation: undefined }; - it('calls BSON deserialize with writeErrors validation turned off', () => { - const res = new MongoDBResponse(serialize({})); - res.toObject(options); - - expect(deseriailzeSpy).to.have.been.called; - - const [ - { - args: [_buffer, { validation }] - } - ] = deseriailzeSpy.getCalls(); - - expect(validation).to.deep.equal({ utf8: { writeErrors: false } }); - }); - }); - - context('when enableUtf8Validation is true', () => { - const options = { enableUtf8Validation: true }; - it('calls BSON deserialize with writeErrors validation turned off', () => { - const res = new MongoDBResponse(serialize({})); - res.toObject(options); - - expect(deseriailzeSpy).to.have.been.called; - - const [ - { - args: [_buffer, { validation }] - } - ] = deseriailzeSpy.getCalls(); - - expect(validation).to.deep.equal({ utf8: { writeErrors: false } }); - }); - }); - - context('when enableUtf8Validation is false', () => { - const options = { enableUtf8Validation: false }; - it('calls BSON deserialize with all validation disabled', () => { - const res = new MongoDBResponse(serialize({})); - res.toObject(options); - - expect(deseriailzeSpy).to.have.been.called; - - const [ - { - args: [_buffer, { validation }] - } - ] = deseriailzeSpy.getCalls(); - - expect(validation).to.deep.equal({ utf8: false }); - }); - }); - }); }); describe('class CursorResponse', () => {