From 4f8ddda45c7ea26956dcdb8d5c42897dac70600f Mon Sep 17 00:00:00 2001 From: Nathan Disidore Date: Wed, 24 Jul 2024 14:19:10 -0500 Subject: [PATCH] Update Vectorize types for GA release (#2421) * Update Vectorize types for GA release * Update resources in Dev Container --- .devcontainer/Dockerfile | 8 +- .../test/vectorize/vectorize-api-test.js | 15 ++-- .../internal/test/vectorize/vectorize-mock.js | 40 ++++----- src/cloudflare/internal/vectorize.d.ts | 89 ++++++++++++++++--- src/cloudflare/vectorize.ts | 31 +++++-- types/defines/vectorize.d.ts | 88 +++++++++++++++--- 6 files changed, 212 insertions(+), 59 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 8776f185f28..172c3d8d9dd 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -18,15 +18,15 @@ ENV NODE_PATH ${NODE_HOME}/lib/node_modules ENV PATH ${NODE_HOME}/bin:$PATH # Install Bazel (via Bazelisk) -ARG BAZELISK_VERSION=v1.19.0 -ARG BAZELISK_DOWNLOAD_SHA="d28b588ac0916abd6bf02defb5433f6eddf7cba35ffa808eabb65a44aab226f7" +ARG BAZELISK_VERSION=v1.20.0 +ARG BAZELISK_DOWNLOAD_SHA="d9af1fa808c0529753c3befda75123236a711d971d3485a390507122148773a3" RUN curl -fSsL -o /usr/local/bin/bazel https://github.com/bazelbuild/bazelisk/releases/download/${BAZELISK_VERSION}/bazelisk-linux-amd64 \ && echo "${BAZELISK_DOWNLOAD_SHA} /usr/local/bin/bazel" | sha256sum --check \ && chmod 0755 /usr/local/bin/bazel # Install Bazel Buildifer -ARG BUILDIFER_VERSION=v6.4.0 -ARG BUILDIFER_DOWNLOAD_SHA="be63db12899f48600bad94051123b1fd7b5251e7661b9168582ce52396132e92" +ARG BUILDIFER_VERSION=v7.1.2 +ARG BUILDIFER_DOWNLOAD_SHA="28285fe7e39ed23dc1a3a525dfcdccbc96c0034ff1d4277905d2672a71b38f13" RUN curl -fSsL -o /usr/local/bin/buildifier https://github.com/bazelbuild/buildtools/releases/download/${BUILDIFER_VERSION}/buildifier-linux-amd64 \ && echo "${BUILDIFER_DOWNLOAD_SHA} /usr/local/bin/buildifier" | sha256sum --check \ && chmod 0755 /usr/local/bin/buildifier diff --git a/src/cloudflare/internal/test/vectorize/vectorize-api-test.js b/src/cloudflare/internal/test/vectorize/vectorize-api-test.js index 4a3cf5f75b0..d7917f20585 100644 --- a/src/cloudflare/internal/test/vectorize/vectorize-api-test.js +++ b/src/cloudflare/internal/test/vectorize/vectorize-api-test.js @@ -7,7 +7,7 @@ import * as assert from "node:assert"; import { KnownModel, DistanceMetric } from "cloudflare:vectorize"; /** - * @typedef {{'vector-search': VectorizeIndex}} Env + * @typedef {{'vector-search': Vectorize}} Env * */ @@ -23,10 +23,10 @@ export const test_vector_search_vector_query = { const results = await IDX.query(new Float32Array(new Array(5).fill(0)), { topK: 3, returnValues: true, - returnMetadata: true, + returnMetadata: "indexed", }); assert.equal(true, results.count > 0); - /** @type {VectorizeMatches} */ + /** @type {VectorizeQueryMatches} */ const expected = { matches: [ { @@ -130,7 +130,7 @@ export const test_vector_search_vector_insert = { }, ]; const results = await IDX.insert(newVectors); - assert.equal(results.count, 5); + assert.equal(results.mutationId, `total vectors: 5`); } }, }; @@ -189,7 +189,7 @@ export const test_vector_search_vector_upsert = { }, ]; const results = await IDX.upsert(newVectors); - assert.equal(results.count, 4); + assert.equal(results.mutationId, `total vectors: 4`); } }, }; @@ -207,10 +207,7 @@ export const test_vector_search_vector_delete_ids = { "vector-b", "vector-c", ]); - assert.deepStrictEqual(results, { - ids: ["vector-a", "vector-b", "vector-c"], - count: 3, - }); + assert.equal(results.mutationId, `deleted vectors: 3`); } }, }; diff --git a/src/cloudflare/internal/test/vectorize/vectorize-mock.js b/src/cloudflare/internal/test/vectorize/vectorize-mock.js index a1f3cfa028c..04fc1c0c7ab 100644 --- a/src/cloudflare/internal/test/vectorize/vectorize-mock.js +++ b/src/cloudflare/internal/test/vectorize/vectorize-mock.js @@ -2,7 +2,7 @@ // Licensed under the Apache 2.0 license found in the LICENSE file or at: // https://opensource.org/licenses/Apache-2.0 -/** @type {Array} */ +/** @type {Array} */ const exampleVectorMatches = [ { id: "b0daca4a-ffd8-4865-926b-e24800af2a2d", @@ -130,7 +130,7 @@ export default { } else if (request.method === "POST" && pathname.endsWith("/insert")) { /** @type {{vectors: Array}} */ const data = await request.json(); - if (data.vectors.find((v) => v.id == "fail-with-test-error")) { + if (data.vectors.find((v) => v.id === "fail-with-test-error")) { return Response.json( { code: 9999, @@ -142,34 +142,34 @@ export default { ); } - return Response.json({ - ids: [ - ...data.vectors.map(({ id }) => id), - ...exampleVectors.map(({ id }) => id), - ], - count: data.vectors.length + exampleVectors.length, - }); + /** @type {VectorizeAsyncMutation} */ + const res = { + // fudge a bit and set the mutation id to some internals so our asserts can check more + mutationId: `total vectors: ${data.vectors.length + exampleVectors.length}`, + }; + return Response.json(res); } else if (request.method === "POST" && pathname.endsWith("/upsert")) { /** @type {{vectors: Array}} */ let data = await request.json(); if (data.vectors.length > 1) data.vectors.splice(-1); - return Response.json({ - ids: [ - ...data.vectors.map(({ id }) => id), - ...exampleVectors.map(({ id }) => id), - ], - count: data.vectors.length + exampleVectors.length, - }); + /** @type {VectorizeAsyncMutation} */ + const res = { + // fudge a bit and set the mutation id to some internals so our asserts can check more + mutationId: `total vectors: ${data.vectors.length + exampleVectors.length}`, + }; + return Response.json(res); } else if ( request.method === "POST" && pathname.endsWith("/deleteByIds") ) { /** @type {{ids: Array}} */ const body = await request.json(); - return Response.json({ - ids: body.ids, - count: body.ids.length, - }); + /** @type {VectorizeAsyncMutation} */ + const res = { + // fudge a bit and set the mutation id to some internals so our asserts can check more + mutationId: `deleted vectors: ${body.ids.length}`, + }; + return Response.json(res); } else if (request.method === "POST" && pathname.endsWith("/getByIds")) { /** @type {{ids: Array}} */ const body = await request.json(); diff --git a/src/cloudflare/internal/vectorize.d.ts b/src/cloudflare/internal/vectorize.d.ts index da6c17453d0..c9f6361ec6a 100644 --- a/src/cloudflare/internal/vectorize.d.ts +++ b/src/cloudflare/internal/vectorize.d.ts @@ -56,11 +56,24 @@ type VectorizeVectorMetadataFilter = { */ type VectorizeDistanceMetric = "euclidean" | "cosine" | "dot-product"; -interface VectorizeQueryOptions { +/** + * Metadata return levels for a Vectorize query. + * + * Default to "none". + * + * @property all Full metadata for the vector return set, including all fields (including those un-indexed) without truncation. This is a more expensive retrieval, as it requires additional fetching & reading of un-indexed data. + * @property indexed Return all metadata fields configured for indexing in the vector return set. This level of retrieval is "free" in that no additional overhead is incurred returning this data. However, note that indexed metadata is subject to truncation (especially for larger strings). + * @property none No indexed metadata will be returned. + */ +type VectorizeMetadataRetrievalLevel = "all" | "indexed" | "none"; + +interface VectorizeQueryOptions< + MetadataReturn extends boolean | VectorizeMetadataRetrievalLevel = boolean, +> { topK?: number; namespace?: string; returnValues?: boolean; - returnMetadata?: boolean; + returnMetadata?: MetadataReturn; filter?: VectorizeVectorMetadataFilter; } @@ -102,7 +115,7 @@ interface VectorizeVector { values: VectorFloatArray | number[]; /** The namespace this vector belongs to. */ namespace?: string; - /** Metadata associated with the vector. Includes the values of the other fields and potentially additional details. */ + /** Metadata associated with the vector. Includes the values of other fields and potentially additional details. */ metadata?: Record; } @@ -116,7 +129,7 @@ type VectorizeMatch = Pick, "values"> & }; /** - * A set of vector {@link VectorizeMatch} for a particular query. + * A set of matching {@link VectorizeMatch} for a particular query. */ interface VectorizeMatches { matches: VectorizeMatch[]; @@ -126,6 +139,9 @@ interface VectorizeMatches { /** * Results of an operation that performed a mutation on a set of vectors. * Here, `ids` is a list of vectors that were successfully processed. + * + * This type is exclusively for the Vectorize **beta** and will be deprecated once Vectorize RC is released. + * See {@link VectorizeAsyncMutation} for its post-beta equivalent. */ interface VectorizeVectorMutation { /* List of ids of vectors that were successfully processed. */ @@ -135,16 +151,20 @@ interface VectorizeVectorMutation { } /** -* Results of an operation that performed a mutation on a set of vectors -* with the v2 version of Vectorize. -* Here, `mutationId` is the identifier for the last mutation processed by Vectorize. -*/ -interface VectorizeVectorMutationV2 { - /* The identifier for the last mutation processed by Vectorize. */ + * Result type indicating a mutation on the Vectorize Index. + * Actual mutations are processed async where the `mutationId` is the unique identifier for the operation. + */ +interface VectorizeAsyncMutation { + /** The unique identifier for the async mutation operation containing the changeset. */ mutationId: string; } - +/** + * A Vectorize Vector Search Index for querying vectors/embeddings. + * + * This type is exclusively for the Vectorize **beta** and will be deprecated once Vectorize RC is released. + * See {@link Vectorize} for its new implementation. + */ declare abstract class VectorizeIndex { /** * Get information about the currently bound index. @@ -186,3 +206,50 @@ declare abstract class VectorizeIndex { */ public getByIds(ids: string[]): Promise; } + +/** + * A Vectorize Vector Search Index for querying vectors/embeddings. + * + * Mutations in this version are async, returning a mutation id. + */ +declare abstract class Vectorize { + /** + * Get information about the currently bound index. + * @returns A promise that resolves with information about the current index. + */ + public describe(): Promise; + /** + * Use the provided vector to perform a similarity search across the index. + * @param vector Input vector that will be used to drive the similarity search. + * @param options Configuration options to massage the returned data. + * @returns A promise that resolves with matched and scored vectors. + */ + public query( + vector: VectorFloatArray | number[], + options: VectorizeQueryOptions + ): Promise; + /** + * Insert a list of vectors into the index dataset. If a provided id exists, an error will be thrown. + * @param vectors List of vectors that will be inserted. + * @returns A promise that resolves with a unique identifier of a mutation containing the insert changeset. + */ + public insert(vectors: VectorizeVector[]): Promise; + /** + * Upsert a list of vectors into the index dataset. If a provided id exists, it will be replaced with the new values. + * @param vectors List of vectors that will be upserted. + * @returns A promise that resolves with a unique identifier of a mutation containing the upsert changeset. + */ + public upsert(vectors: VectorizeVector[]): Promise; + /** + * Delete a list of vectors with a matching id. + * @param ids List of vector ids that should be deleted. + * @returns A promise that resolves with a unique identifier of a mutation containing the delete changeset. + */ + public deleteByIds(ids: string[]): Promise; + /** + * Get a list of vectors with a matching id. + * @param ids List of vector ids that should be returned. + * @returns A promise that resolves with the raw unscored vectors matching the id set. + */ + public getByIds(ids: string[]): Promise; +} diff --git a/src/cloudflare/vectorize.ts b/src/cloudflare/vectorize.ts index 7a8ab580e6f..93ca2105cd0 100644 --- a/src/cloudflare/vectorize.ts +++ b/src/cloudflare/vectorize.ts @@ -7,11 +7,11 @@ * These can be supplied in place of configuring explicit dimensions. */ export enum KnownModel { - 'openai/text-embedding-ada-002' = 'openai/text-embedding-ada-002', - 'cohere/embed-multilingual-v2.0' = 'cohere/embed-multilingual-v2.0', - '@cf/baai/bge-small-en-v1.5' = '@cf/baai/bge-small-en-v1.5', - '@cf/baai/bge-base-en-v1.5' = '@cf/baai/bge-base-en-v1.5', - '@cf/baai/bge-large-en-v1.5' = '@cf/baai/bge-large-en-v1.5', + "openai/text-embedding-ada-002" = "openai/text-embedding-ada-002", + "cohere/embed-multilingual-v2.0" = "cohere/embed-multilingual-v2.0", + "@cf/baai/bge-small-en-v1.5" = "@cf/baai/bge-small-en-v1.5", + "@cf/baai/bge-base-en-v1.5" = "@cf/baai/bge-base-en-v1.5", + "@cf/baai/bge-large-en-v1.5" = "@cf/baai/bge-large-en-v1.5", } /** @@ -23,3 +23,24 @@ export enum DistanceMetric { COSINE = "cosine", DOT_PRODUCT = "dot-product", } + +/** + * Supported metadata return levels for a Vectorize query. + */ +export enum MetadataRetrievalLevel { + /** + * Full metadata for the vector return set, including all fields (including those un-indexed) without truncation. + * + * This is a more expensive retrieval, as it requires additional fetching & reading of un-indexed data. + */ + ALL = "all", + /** + * Return all metadata fields configured for indexing in the vector return set. + * + * This level of retrieval is "free" in that no additional overhead is incurred returning this data. + * However, note that indexed metadata is subject to truncation (especially for larger strings). + */ + INDEXED = "indexed", + /** No indexed metadata will be returned. */ + NONE = "none", +} diff --git a/types/defines/vectorize.d.ts b/types/defines/vectorize.d.ts index 59e12dfdf22..0d14327237f 100644 --- a/types/defines/vectorize.d.ts +++ b/types/defines/vectorize.d.ts @@ -48,11 +48,24 @@ type VectorizeVectorMetadataFilter = { */ type VectorizeDistanceMetric = "euclidean" | "cosine" | "dot-product"; -interface VectorizeQueryOptions { +/** + * Metadata return levels for a Vectorize query. + * + * Default to "none". + * + * @property all Full metadata for the vector return set, including all fields (including those un-indexed) without truncation. This is a more expensive retrieval, as it requires additional fetching & reading of un-indexed data. + * @property indexed Return all metadata fields configured for indexing in the vector return set. This level of retrieval is "free" in that no additional overhead is incurred returning this data. However, note that indexed metadata is subject to truncation (especially for larger strings). + * @property none No indexed metadata will be returned. + */ +type VectorizeMetadataRetrievalLevel = "all" | "indexed" | "none"; + +interface VectorizeQueryOptions< + MetadataReturn extends boolean | VectorizeMetadataRetrievalLevel = boolean, +> { topK?: number; namespace?: string; returnValues?: boolean; - returnMetadata?: boolean; + returnMetadata?: MetadataReturn; filter?: VectorizeVectorMetadataFilter; } @@ -94,7 +107,7 @@ interface VectorizeVector { values: VectorFloatArray | number[]; /** The namespace this vector belongs to. */ namespace?: string; - /** Metadata associated with the vector. Includes the values of the other fields and potentially additional details. */ + /** Metadata associated with the vector. Includes the values of other fields and potentially additional details. */ metadata?: Record; } @@ -108,7 +121,7 @@ type VectorizeMatch = Pick, "values"> & }; /** - * A set of vector {@link VectorizeMatch} for a particular query. + * A set of matching {@link VectorizeMatch} for a particular query. */ interface VectorizeMatches { matches: VectorizeMatch[]; @@ -118,6 +131,9 @@ interface VectorizeMatches { /** * Results of an operation that performed a mutation on a set of vectors. * Here, `ids` is a list of vectors that were successfully processed. + * + * This type is exclusively for the Vectorize **beta** and will be deprecated once Vectorize RC is released. + * See {@link VectorizeAsyncMutation} for its post-beta equivalent. */ interface VectorizeVectorMutation { /* List of ids of vectors that were successfully processed. */ @@ -127,15 +143,20 @@ interface VectorizeVectorMutation { } /** -* Results of an operation that performed a mutation on a set of vectors -* with the v2 version of Vectorize. -* Here, `mutationId` is the identifier for the last mutation processed by Vectorize. -*/ -interface VectorizeVectorMutationV2 { - /* The identifier for the last mutation processed by Vectorize. */ + * Result type indicating a mutation on the Vectorize Index. + * Actual mutations are processed async where the `mutationId` is the unique identifier for the operation. + */ +interface VectorizeAsyncMutation { + /** The unique identifier for the async mutation operation containing the changeset. */ mutationId: string; } +/** + * A Vectorize Vector Search Index for querying vectors/embeddings. + * + * This type is exclusively for the Vectorize **beta** and will be deprecated once Vectorize RC is released. + * See {@link Vectorize} for its new implementation. + */ declare abstract class VectorizeIndex { /** * Get information about the currently bound index. @@ -177,3 +198,50 @@ declare abstract class VectorizeIndex { */ public getByIds(ids: string[]): Promise; } + +/** + * A Vectorize Vector Search Index for querying vectors/embeddings. + * + * Mutations in this version are async, returning a mutation id. + */ +declare abstract class Vectorize { + /** + * Get information about the currently bound index. + * @returns A promise that resolves with information about the current index. + */ + public describe(): Promise; + /** + * Use the provided vector to perform a similarity search across the index. + * @param vector Input vector that will be used to drive the similarity search. + * @param options Configuration options to massage the returned data. + * @returns A promise that resolves with matched and scored vectors. + */ + public query( + vector: VectorFloatArray | number[], + options: VectorizeQueryOptions + ): Promise; + /** + * Insert a list of vectors into the index dataset. If a provided id exists, an error will be thrown. + * @param vectors List of vectors that will be inserted. + * @returns A promise that resolves with a unique identifier of a mutation containing the insert changeset. + */ + public insert(vectors: VectorizeVector[]): Promise; + /** + * Upsert a list of vectors into the index dataset. If a provided id exists, it will be replaced with the new values. + * @param vectors List of vectors that will be upserted. + * @returns A promise that resolves with a unique identifier of a mutation containing the upsert changeset. + */ + public upsert(vectors: VectorizeVector[]): Promise; + /** + * Delete a list of vectors with a matching id. + * @param ids List of vector ids that should be deleted. + * @returns A promise that resolves with a unique identifier of a mutation containing the delete changeset. + */ + public deleteByIds(ids: string[]): Promise; + /** + * Get a list of vectors with a matching id. + * @param ids List of vector ids that should be returned. + * @returns A promise that resolves with the raw unscored vectors matching the id set. + */ + public getByIds(ids: string[]): Promise; +}