Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(cosmosdbnosql): Add Semantic Cache Integration #7033

Merged
merged 36 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
ea34aa7
add semantic cache
aditishree1 Oct 20, 2024
ceb00af
add test
aditishree1 Oct 20, 2024
0ee6a31
Merge branch 'langchain-ai:main' into add-semanticCache
aditishree1 Oct 21, 2024
d306cf9
format and fix lint errors
aditishree1 Oct 21, 2024
75f496b
fix example
aditishree1 Oct 21, 2024
72e790d
add integration tests
aditishree1 Oct 21, 2024
77f0b0b
set default db and container name
aditishree1 Oct 21, 2024
421a60c
fix int test
aditishree1 Oct 21, 2024
b753e8e
Merge branch 'langchain-ai:main' into add-semanticCache
aditishree1 Oct 22, 2024
2aa22b2
add semantic cache
aditishree1 Oct 20, 2024
1023ddb
add test
aditishree1 Oct 20, 2024
8482390
format and fix lint errors
aditishree1 Oct 21, 2024
7dc12d0
fix example
aditishree1 Oct 21, 2024
116624f
add integration tests
aditishree1 Oct 21, 2024
3914e11
set default db and container name
aditishree1 Oct 21, 2024
e3a0077
fix int test
aditishree1 Oct 21, 2024
fc7ead0
add docs and example
aditishree1 Oct 22, 2024
b21e09b
Merge branch 'add-semanticCache' of https://github.com/aditishree1/la…
aditishree1 Oct 22, 2024
1de4cb3
update doc
aditishree1 Oct 22, 2024
cfff31a
update link
aditishree1 Oct 22, 2024
5f3397b
Revert "update link"
aditishree1 Oct 22, 2024
2ef6d9a
update link
aditishree1 Oct 22, 2024
e0533e5
Update libs/langchain-azure-cosmosdb/src/caches.ts
aditishree1 Oct 24, 2024
dc35153
Update libs/langchain-azure-cosmosdb/src/caches.ts
aditishree1 Oct 24, 2024
75b9d6f
update user-agent
aditishree1 Oct 24, 2024
4800919
update user agent
aditishree1 Oct 24, 2024
369b199
fix docs
aditishree1 Oct 25, 2024
079eb03
add link in doc and remove defaultdb from cache
aditishree1 Oct 29, 2024
f41d737
Move documentation pages
jacoblee93 Oct 29, 2024
193fe00
Merge branch 'main' of github.com:langchain-ai/langchainjs into 7033
jacoblee93 Oct 29, 2024
f0a3f0c
add similarity Score threshold
aditishree1 Oct 30, 2024
1bec699
Update examples/src/caches/azure_cosmosdb_nosql/azure_cosmosdb_nosql.ts
aditishree1 Oct 30, 2024
2156145
update tests
aditishree1 Nov 1, 2024
26bccdf
Merge remote-tracking branch 'origin' into add-semanticCache
aditishree1 Nov 1, 2024
6c20e33
Format
jacoblee93 Nov 5, 2024
be89aba
Merge branch 'main' into add-semanticCache
jacoblee93 Nov 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion examples/src/caches/azure_cosmosdb_nosql/azure_cosmosdb_nosql.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,21 @@ const config: AzureCosmosDBNoSQLConfig = {
// use endpoint to initiate client with managed identity
connectionString: "<CONNECTION_STRING>",
};
const cache = new AzureCosmosDBNoSQLSemanticCache(embeddings, config);

/**
* Sets the threshold similarity score for returning cached results based on vector distance.
* Cached output is returned only if the similarity score meets or exceeds this threshold;
* otherwise, a new result is generated. Default is 0.6, adjustable via the constructor
* to suit various distance functions and use cases.
* (see: https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/query/vectordistance).
*/

const similarityScoreThreshold = 0.5;
const cache = new AzureCosmosDBNoSQLSemanticCache(
embeddings,
config,
similarityScoreThreshold
);

const model = new ChatOpenAI({ cache });

Expand Down
34 changes: 27 additions & 7 deletions libs/langchain-azure-cosmosdb/src/caches.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,16 @@ export class AzureCosmosDBNoSQLSemanticCache extends BaseCache {

private config: AzureCosmosDBNoSQLConfig;

private similarityScoreThreshold: number;

private cacheDict: { [key: string]: AzureCosmosDBNoSQLVectorStore } = {};

private vectorDistanceFunction: string;

constructor(
embeddings: EmbeddingsInterface,
dbConfig: AzureCosmosDBNoSQLConfig
dbConfig: AzureCosmosDBNoSQLConfig,
similarityScoreThreshold: number = 0.6
) {
super();
let client: CosmosClient;
Expand Down Expand Up @@ -88,13 +93,18 @@ export class AzureCosmosDBNoSQLSemanticCache extends BaseCache {
client = dbConfig.client;
}

this.vectorDistanceFunction =
dbConfig.vectorEmbeddingPolicy?.vectorEmbeddings[0].distanceFunction ??
"cosine";

this.config = {
...dbConfig,
client,
databaseName: dbConfig.databaseName,
containerName: dbConfig.containerName ?? DEFAULT_CONTAINER_NAME,
};
this.embeddings = embeddings;
this.similarityScoreThreshold = similarityScoreThreshold;
}

private getLlmCache(llmKey: string) {
Expand All @@ -118,14 +128,24 @@ export class AzureCosmosDBNoSQLSemanticCache extends BaseCache {
public async lookup(prompt: string, llmKey: string) {
const llmCache = this.getLlmCache(llmKey);

const results = await llmCache.similaritySearch(prompt, 1);
const results = await llmCache.similaritySearchWithScore(prompt, 1);
if (!results.length) return null;

const generations = results.flatMap((result) =>
result.metadata.return_value.map((gen: string) =>
deserializeStoredGeneration(JSON.parse(gen))
)
);
const generations = results
.flatMap(([document, score]) => {
const isSimilar =
(this.vectorDistanceFunction === "euclidean" &&
score <= this.similarityScoreThreshold) ||
(this.vectorDistanceFunction !== "euclidean" &&
score >= this.similarityScoreThreshold);

if (!isSimilar) return undefined;

return document.metadata.return_value.map((gen: string) =>
deserializeStoredGeneration(JSON.parse(gen))
);
})
.filter((gen) => gen !== undefined);

return generations.length > 0 ? generations : null;
}
Expand Down
157 changes: 113 additions & 44 deletions libs/langchain-azure-cosmosdb/src/tests/caches.int.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ import {
IndexingMode,
VectorEmbeddingPolicy,
} from "@azure/cosmos";
import { FakeEmbeddings, FakeLLM } from "@langchain/core/utils/testing";
import { DefaultAzureCredential } from "@azure/identity";
import { ChatOpenAI, OpenAIEmbeddings } from "@langchain/openai";
import { AzureCosmosDBNoSQLSemanticCache } from "../caches.js";

const DATABASE_NAME = "langchainTestCacheDB";
Expand All @@ -31,33 +31,42 @@ function vectorEmbeddingPolicy(
path: "/embedding",
dataType: "float32",
distanceFunction,
dimensions: 0,
dimensions: 505,
},
],
};
}

function initializeCache(
indexType: any,
distanceFunction: any
distanceFunction: any,
similarityThreshold?: number
): AzureCosmosDBNoSQLSemanticCache {
let cache: AzureCosmosDBNoSQLSemanticCache;
if (process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING) {
cache = new AzureCosmosDBNoSQLSemanticCache(new FakeEmbeddings(), {
databaseName: DATABASE_NAME,
containerName: CONTAINER_NAME,
connectionString: process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING,
indexingPolicy: indexingPolicy(indexType),
vectorEmbeddingPolicy: vectorEmbeddingPolicy(distanceFunction),
});
cache = new AzureCosmosDBNoSQLSemanticCache(
new OpenAIEmbeddings(),
{
databaseName: DATABASE_NAME,
containerName: CONTAINER_NAME,
connectionString: process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING,
indexingPolicy: indexingPolicy(indexType),
vectorEmbeddingPolicy: vectorEmbeddingPolicy(distanceFunction),
},
similarityThreshold
);
} else if (process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT) {
cache = new AzureCosmosDBNoSQLSemanticCache(new FakeEmbeddings(), {
databaseName: DATABASE_NAME,
containerName: CONTAINER_NAME,
endpoint: process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT,
indexingPolicy: indexingPolicy(indexType),
vectorEmbeddingPolicy: vectorEmbeddingPolicy(distanceFunction),
});
cache = new AzureCosmosDBNoSQLSemanticCache(
new OpenAIEmbeddings(),
{
databaseName: DATABASE_NAME,
containerName: CONTAINER_NAME,
endpoint: process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT,
indexingPolicy: indexingPolicy(indexType),
vectorEmbeddingPolicy: vectorEmbeddingPolicy(distanceFunction),
},
similarityThreshold
);
} else {
throw new Error(
"Please set the environment variable AZURE_COSMOSDB_NOSQL_CONNECTION_STRING or AZURE_COSMOSDB_NOSQL_ENDPOINT"
Expand All @@ -78,6 +87,10 @@ function initializeCache(
* Once you have the instance running, you need to set the following environment
* variables before running the test:
* - AZURE_COSMOSDB_NOSQL_CONNECTION_STRING or AZURE_COSMOSDB_NOSQL_ENDPOINT
* - AZURE_OPENAI_API_KEY
* - AZURE_OPENAI_API_INSTANCE_NAME
* - AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME
* - AZURE_OPENAI_API_VERSION
*/
describe("Azure CosmosDB NoSQL Semantic Cache", () => {
beforeEach(async () => {
Expand Down Expand Up @@ -105,58 +118,114 @@ describe("Azure CosmosDB NoSQL Semantic Cache", () => {
// Ignore error if the database does not exist
}
});

it("test AzureCosmosDBNoSqlSemanticCache with cosine quantizedFlat", async () => {
const cache = initializeCache("quantizedFlat", "cosine");
const llm = new FakeLLM({});
const llmString = JSON.stringify(llm._identifyingParams());
const model = new ChatOpenAI({ cache });
const llmString = JSON.stringify(model._identifyingParams);
await cache.update("foo", llmString, [{ text: "fizz" }]);
const cacheOutput = await cache.lookup("bar", llmString);

let cacheOutput = await cache.lookup("foo", llmString);
expect(cacheOutput).toEqual([{ text: "fizz" }]);

cacheOutput = await cache.lookup("bar", llmString);
expect(cacheOutput).toEqual(null);

await cache.clear(llmString);
});

it("test AzureCosmosDBNoSqlSemanticCache with cosine flat", async () => {
const cache = initializeCache("flat", "cosine");
const llm = new FakeLLM({});
const llmString = JSON.stringify(llm._identifyingParams());
await cache.update("foo", llmString, [{ text: "Buzz" }]);
const cacheOutput = await cache.lookup("bar", llmString);
expect(cacheOutput).toEqual([{ text: "Buzz" }]);
const model = new ChatOpenAI({ cache });
const llmString = JSON.stringify(model._identifyingParams);
await cache.update("foo", llmString, [{ text: "fizz" }]);

let cacheOutput = await cache.lookup("foo", llmString);
expect(cacheOutput).toEqual([{ text: "fizz" }]);

cacheOutput = await cache.lookup("bar", llmString);
expect(cacheOutput).toEqual(null);

await cache.clear(llmString);
});

it("test AzureCosmosDBNoSqlSemanticCache with dotProduct quantizedFlat", async () => {
const cache = initializeCache("quantizedFlat", "dotproduct");
const llm = new FakeLLM({});
const llmString = JSON.stringify(llm._identifyingParams());
await cache.update("foo", llmString, [{ text: "fizz" }, { text: "Buzz" }]);
const cacheOutput = await cache.lookup("bar", llmString);
expect(cacheOutput).toEqual([{ text: "fizz" }, { text: "Buzz" }]);
const model = new ChatOpenAI({ cache });
const llmString = JSON.stringify(model._identifyingParams);
await cache.update("foo", llmString, [{ text: "fizz" }]);

let cacheOutput = await cache.lookup("foo", llmString);
expect(cacheOutput).toEqual([{ text: "fizz" }]);

cacheOutput = await cache.lookup("bar", llmString);
expect(cacheOutput).toEqual(null);

await cache.clear(llmString);
});

it("test AzureCosmosDBNoSqlSemanticCache with dotProduct flat", async () => {
const cache = initializeCache("flat", "dotproduct");
const llm = new FakeLLM({});
const llmString = JSON.stringify(llm._identifyingParams());
await cache.update("foo", llmString, [{ text: "fizz" }, { text: "Buzz" }]);
const cacheOutput = await cache.lookup("bar", llmString);
expect(cacheOutput).toEqual([{ text: "fizz" }, { text: "Buzz" }]);
const cache = initializeCache("flat", "cosine");
const model = new ChatOpenAI({ cache });
const llmString = JSON.stringify(model._identifyingParams);
await cache.update("foo", llmString, [{ text: "fizz" }]);

let cacheOutput = await cache.lookup("foo", llmString);
expect(cacheOutput).toEqual([{ text: "fizz" }]);

cacheOutput = await cache.lookup("bar", llmString);
expect(cacheOutput).toEqual(null);

await cache.clear(llmString);
});

it("test AzureCosmosDBNoSqlSemanticCache with euclidean quantizedFlat", async () => {
const cache = initializeCache("quantizedFlat", "euclidean");
const llm = new FakeLLM({});
const llmString = JSON.stringify(llm._identifyingParams());
const model = new ChatOpenAI({ cache });
const llmString = JSON.stringify(model._identifyingParams);
await cache.update("foo", llmString, [{ text: "fizz" }]);
const cacheOutput = await cache.lookup("bar", llmString);

let cacheOutput = await cache.lookup("foo", llmString);
expect(cacheOutput).toEqual([{ text: "fizz" }]);

cacheOutput = await cache.lookup("bar", llmString);
expect(cacheOutput).toEqual(null);

await cache.clear(llmString);
});

it("test AzureCosmosDBNoSqlSemanticCache with euclidean flat", async () => {
const cache = initializeCache("flat", "euclidean");
const llm = new FakeLLM({});
const llmString = JSON.stringify(llm._identifyingParams());
await cache.update("foo", llmString, [{ text: "fizz" }, { text: "Buzz" }]);
const cacheOutput = await cache.lookup("bar", llmString);
expect(cacheOutput).toEqual([{ text: "fizz" }, { text: "Buzz" }]);
const model = new ChatOpenAI({ cache });
const llmString = JSON.stringify(model._identifyingParams);
await cache.update("foo", llmString, [{ text: "fizz" }]);

let cacheOutput = await cache.lookup("foo", llmString);
expect(cacheOutput).toEqual([{ text: "fizz" }]);

cacheOutput = await cache.lookup("bar", llmString);
expect(cacheOutput).toEqual(null);

await cache.clear(llmString);
});

it("test AzureCosmosDBNoSqlSemanticCache response according to similarity score", async () => {
const cache = initializeCache("quantizedFlat", "cosine");
const model = new ChatOpenAI({ cache });
const response1 = await model.invoke(
"Where is the headquarter of Microsoft?"
);
console.log(response1.content);
// gives similarity score of 0.56 which is less than the threshold of 0.6. The cache
// will retun null which will allow the model to generate result.
const response2 = await model.invoke(
"List all Microsoft offices in India."
);
expect(response2.content).not.toEqual(response1.content);
console.log(response2.content);
// gives similarity score of .63 > 0.6
const response3 = await model.invoke("Tell me something about Microsoft");
expect(response3.content).toEqual(response1.content);
console.log(response3.content);
});
});