Skip to content

Commit

Permalink
Update Vectorize types for GA release (#2421)
Browse files Browse the repository at this point in the history
* Update Vectorize types for GA release

* Update resources in Dev Container
  • Loading branch information
ndisidore authored Jul 24, 2024
1 parent af3ee84 commit 4f8ddda
Show file tree
Hide file tree
Showing 6 changed files with 212 additions and 59 deletions.
8 changes: 4 additions & 4 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ ENV NODE_PATH ${NODE_HOME}/lib/node_modules
ENV PATH ${NODE_HOME}/bin:$PATH

# Install Bazel (via Bazelisk)
ARG BAZELISK_VERSION=v1.19.0
ARG BAZELISK_DOWNLOAD_SHA="d28b588ac0916abd6bf02defb5433f6eddf7cba35ffa808eabb65a44aab226f7"
ARG BAZELISK_VERSION=v1.20.0
ARG BAZELISK_DOWNLOAD_SHA="d9af1fa808c0529753c3befda75123236a711d971d3485a390507122148773a3"
RUN curl -fSsL -o /usr/local/bin/bazel https://github.com/bazelbuild/bazelisk/releases/download/${BAZELISK_VERSION}/bazelisk-linux-amd64 \
&& echo "${BAZELISK_DOWNLOAD_SHA} /usr/local/bin/bazel" | sha256sum --check \
&& chmod 0755 /usr/local/bin/bazel

# Install Bazel Buildifer
ARG BUILDIFER_VERSION=v6.4.0
ARG BUILDIFER_DOWNLOAD_SHA="be63db12899f48600bad94051123b1fd7b5251e7661b9168582ce52396132e92"
ARG BUILDIFER_VERSION=v7.1.2
ARG BUILDIFER_DOWNLOAD_SHA="28285fe7e39ed23dc1a3a525dfcdccbc96c0034ff1d4277905d2672a71b38f13"
RUN curl -fSsL -o /usr/local/bin/buildifier https://github.com/bazelbuild/buildtools/releases/download/${BUILDIFER_VERSION}/buildifier-linux-amd64 \
&& echo "${BUILDIFER_DOWNLOAD_SHA} /usr/local/bin/buildifier" | sha256sum --check \
&& chmod 0755 /usr/local/bin/buildifier
Expand Down
15 changes: 6 additions & 9 deletions src/cloudflare/internal/test/vectorize/vectorize-api-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import * as assert from "node:assert";
import { KnownModel, DistanceMetric } from "cloudflare:vectorize";

/**
* @typedef {{'vector-search': VectorizeIndex}} Env
* @typedef {{'vector-search': Vectorize}} Env
*
*/

Expand All @@ -23,10 +23,10 @@ export const test_vector_search_vector_query = {
const results = await IDX.query(new Float32Array(new Array(5).fill(0)), {
topK: 3,
returnValues: true,
returnMetadata: true,
returnMetadata: "indexed",
});
assert.equal(true, results.count > 0);
/** @type {VectorizeMatches} */
/** @type {VectorizeQueryMatches} */
const expected = {
matches: [
{
Expand Down Expand Up @@ -130,7 +130,7 @@ export const test_vector_search_vector_insert = {
},
];
const results = await IDX.insert(newVectors);
assert.equal(results.count, 5);
assert.equal(results.mutationId, `total vectors: 5`);
}
},
};
Expand Down Expand Up @@ -189,7 +189,7 @@ export const test_vector_search_vector_upsert = {
},
];
const results = await IDX.upsert(newVectors);
assert.equal(results.count, 4);
assert.equal(results.mutationId, `total vectors: 4`);
}
},
};
Expand All @@ -207,10 +207,7 @@ export const test_vector_search_vector_delete_ids = {
"vector-b",
"vector-c",
]);
assert.deepStrictEqual(results, {
ids: ["vector-a", "vector-b", "vector-c"],
count: 3,
});
assert.equal(results.mutationId, `deleted vectors: 3`);
}
},
};
Expand Down
40 changes: 20 additions & 20 deletions src/cloudflare/internal/test/vectorize/vectorize-mock.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Licensed under the Apache 2.0 license found in the LICENSE file or at:
// https://opensource.org/licenses/Apache-2.0

/** @type {Array<VectorizeMatch>} */
/** @type {Array<VectorizeQueryMatch>} */
const exampleVectorMatches = [
{
id: "b0daca4a-ffd8-4865-926b-e24800af2a2d",
Expand Down Expand Up @@ -130,7 +130,7 @@ export default {
} else if (request.method === "POST" && pathname.endsWith("/insert")) {
/** @type {{vectors: Array<VectorizeVector>}} */
const data = await request.json();
if (data.vectors.find((v) => v.id == "fail-with-test-error")) {
if (data.vectors.find((v) => v.id === "fail-with-test-error")) {
return Response.json(
{
code: 9999,
Expand All @@ -142,34 +142,34 @@ export default {
);
}

return Response.json({
ids: [
...data.vectors.map(({ id }) => id),
...exampleVectors.map(({ id }) => id),
],
count: data.vectors.length + exampleVectors.length,
});
/** @type {VectorizeAsyncMutation} */
const res = {
// fudge a bit and set the mutation id to some internals so our asserts can check more
mutationId: `total vectors: ${data.vectors.length + exampleVectors.length}`,
};
return Response.json(res);
} else if (request.method === "POST" && pathname.endsWith("/upsert")) {
/** @type {{vectors: Array<VectorizeVector>}} */
let data = await request.json();
if (data.vectors.length > 1) data.vectors.splice(-1);
return Response.json({
ids: [
...data.vectors.map(({ id }) => id),
...exampleVectors.map(({ id }) => id),
],
count: data.vectors.length + exampleVectors.length,
});
/** @type {VectorizeAsyncMutation} */
const res = {
// fudge a bit and set the mutation id to some internals so our asserts can check more
mutationId: `total vectors: ${data.vectors.length + exampleVectors.length}`,
};
return Response.json(res);
} else if (
request.method === "POST" &&
pathname.endsWith("/deleteByIds")
) {
/** @type {{ids: Array<string>}} */
const body = await request.json();
return Response.json({
ids: body.ids,
count: body.ids.length,
});
/** @type {VectorizeAsyncMutation} */
const res = {
// fudge a bit and set the mutation id to some internals so our asserts can check more
mutationId: `deleted vectors: ${body.ids.length}`,
};
return Response.json(res);
} else if (request.method === "POST" && pathname.endsWith("/getByIds")) {
/** @type {{ids: Array<string>}} */
const body = await request.json();
Expand Down
89 changes: 78 additions & 11 deletions src/cloudflare/internal/vectorize.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,24 @@ type VectorizeVectorMetadataFilter = {
*/
type VectorizeDistanceMetric = "euclidean" | "cosine" | "dot-product";

interface VectorizeQueryOptions {
/**
* Metadata return levels for a Vectorize query.
*
* Default to "none".
*
* @property all Full metadata for the vector return set, including all fields (including those un-indexed) without truncation. This is a more expensive retrieval, as it requires additional fetching & reading of un-indexed data.
* @property indexed Return all metadata fields configured for indexing in the vector return set. This level of retrieval is "free" in that no additional overhead is incurred returning this data. However, note that indexed metadata is subject to truncation (especially for larger strings).
* @property none No indexed metadata will be returned.
*/
type VectorizeMetadataRetrievalLevel = "all" | "indexed" | "none";

interface VectorizeQueryOptions<
MetadataReturn extends boolean | VectorizeMetadataRetrievalLevel = boolean,
> {
topK?: number;
namespace?: string;
returnValues?: boolean;
returnMetadata?: boolean;
returnMetadata?: MetadataReturn;
filter?: VectorizeVectorMetadataFilter;
}

Expand Down Expand Up @@ -102,7 +115,7 @@ interface VectorizeVector {
values: VectorFloatArray | number[];
/** The namespace this vector belongs to. */
namespace?: string;
/** Metadata associated with the vector. Includes the values of the other fields and potentially additional details. */
/** Metadata associated with the vector. Includes the values of other fields and potentially additional details. */
metadata?: Record<string, VectorizeVectorMetadata>;
}

Expand All @@ -116,7 +129,7 @@ type VectorizeMatch = Pick<Partial<VectorizeVector>, "values"> &
};

/**
* A set of vector {@link VectorizeMatch} for a particular query.
* A set of matching {@link VectorizeMatch} for a particular query.
*/
interface VectorizeMatches {
matches: VectorizeMatch[];
Expand All @@ -126,6 +139,9 @@ interface VectorizeMatches {
/**
* Results of an operation that performed a mutation on a set of vectors.
* Here, `ids` is a list of vectors that were successfully processed.
*
* This type is exclusively for the Vectorize **beta** and will be deprecated once Vectorize RC is released.
* See {@link VectorizeAsyncMutation} for its post-beta equivalent.
*/
interface VectorizeVectorMutation {
/* List of ids of vectors that were successfully processed. */
Expand All @@ -135,16 +151,20 @@ interface VectorizeVectorMutation {
}

/**
* Results of an operation that performed a mutation on a set of vectors
* with the v2 version of Vectorize.
* Here, `mutationId` is the identifier for the last mutation processed by Vectorize.
*/
interface VectorizeVectorMutationV2 {
/* The identifier for the last mutation processed by Vectorize. */
* Result type indicating a mutation on the Vectorize Index.
* Actual mutations are processed async where the `mutationId` is the unique identifier for the operation.
*/
interface VectorizeAsyncMutation {
/** The unique identifier for the async mutation operation containing the changeset. */
mutationId: string;
}


/**
* A Vectorize Vector Search Index for querying vectors/embeddings.
*
* This type is exclusively for the Vectorize **beta** and will be deprecated once Vectorize RC is released.
* See {@link Vectorize} for its new implementation.
*/
declare abstract class VectorizeIndex {
/**
* Get information about the currently bound index.
Expand Down Expand Up @@ -186,3 +206,50 @@ declare abstract class VectorizeIndex {
*/
public getByIds(ids: string[]): Promise<VectorizeVector[]>;
}

/**
* A Vectorize Vector Search Index for querying vectors/embeddings.
*
* Mutations in this version are async, returning a mutation id.
*/
declare abstract class Vectorize {
/**
* Get information about the currently bound index.
* @returns A promise that resolves with information about the current index.
*/
public describe(): Promise<VectorizeIndexDetails>;
/**
* Use the provided vector to perform a similarity search across the index.
* @param vector Input vector that will be used to drive the similarity search.
* @param options Configuration options to massage the returned data.
* @returns A promise that resolves with matched and scored vectors.
*/
public query(
vector: VectorFloatArray | number[],
options: VectorizeQueryOptions<VectorizeMetadataRetrievalLevel>
): Promise<VectorizeMatches>;
/**
* Insert a list of vectors into the index dataset. If a provided id exists, an error will be thrown.
* @param vectors List of vectors that will be inserted.
* @returns A promise that resolves with a unique identifier of a mutation containing the insert changeset.
*/
public insert(vectors: VectorizeVector[]): Promise<VectorizeAsyncMutation>;
/**
* Upsert a list of vectors into the index dataset. If a provided id exists, it will be replaced with the new values.
* @param vectors List of vectors that will be upserted.
* @returns A promise that resolves with a unique identifier of a mutation containing the upsert changeset.
*/
public upsert(vectors: VectorizeVector[]): Promise<VectorizeAsyncMutation>;
/**
* Delete a list of vectors with a matching id.
* @param ids List of vector ids that should be deleted.
* @returns A promise that resolves with a unique identifier of a mutation containing the delete changeset.
*/
public deleteByIds(ids: string[]): Promise<VectorizeAsyncMutation>;
/**
* Get a list of vectors with a matching id.
* @param ids List of vector ids that should be returned.
* @returns A promise that resolves with the raw unscored vectors matching the id set.
*/
public getByIds(ids: string[]): Promise<VectorizeVector[]>;
}
31 changes: 26 additions & 5 deletions src/cloudflare/vectorize.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
* These can be supplied in place of configuring explicit dimensions.
*/
export enum KnownModel {
'openai/text-embedding-ada-002' = 'openai/text-embedding-ada-002',
'cohere/embed-multilingual-v2.0' = 'cohere/embed-multilingual-v2.0',
'@cf/baai/bge-small-en-v1.5' = '@cf/baai/bge-small-en-v1.5',
'@cf/baai/bge-base-en-v1.5' = '@cf/baai/bge-base-en-v1.5',
'@cf/baai/bge-large-en-v1.5' = '@cf/baai/bge-large-en-v1.5',
"openai/text-embedding-ada-002" = "openai/text-embedding-ada-002",
"cohere/embed-multilingual-v2.0" = "cohere/embed-multilingual-v2.0",
"@cf/baai/bge-small-en-v1.5" = "@cf/baai/bge-small-en-v1.5",
"@cf/baai/bge-base-en-v1.5" = "@cf/baai/bge-base-en-v1.5",
"@cf/baai/bge-large-en-v1.5" = "@cf/baai/bge-large-en-v1.5",
}

/**
Expand All @@ -23,3 +23,24 @@ export enum DistanceMetric {
COSINE = "cosine",
DOT_PRODUCT = "dot-product",
}

/**
* Supported metadata return levels for a Vectorize query.
*/
export enum MetadataRetrievalLevel {
/**
* Full metadata for the vector return set, including all fields (including those un-indexed) without truncation.
*
* This is a more expensive retrieval, as it requires additional fetching & reading of un-indexed data.
*/
ALL = "all",
/**
* Return all metadata fields configured for indexing in the vector return set.
*
* This level of retrieval is "free" in that no additional overhead is incurred returning this data.
* However, note that indexed metadata is subject to truncation (especially for larger strings).
*/
INDEXED = "indexed",
/** No indexed metadata will be returned. */
NONE = "none",
}
Loading

0 comments on commit 4f8ddda

Please sign in to comment.