langchain-ai · jacoblee93 · Nov 5, 2024 · Oct 20, 2024 · Oct 20, 2024 · Oct 21, 2024
diff --git a/examples/src/caches/azure_cosmosdb_nosql/azure_cosmosdb_nosql.ts b/examples/src/caches/azure_cosmosdb_nosql/azure_cosmosdb_nosql.ts
@@ -11,7 +11,21 @@ const config: AzureCosmosDBNoSQLConfig = {
   // use endpoint to initiate client with managed identity
   connectionString: "<CONNECTION_STRING>",
 };
-const cache = new AzureCosmosDBNoSQLSemanticCache(embeddings, config);
+
+/**
+ * Sets the threshold similarity score for returning cached results based on vector distance.
+ * Cached output is returned only if the similarity score meets or exceeds this threshold;
+ * otherwise, a new result is generated. Default is 0.6, adjustable via the constructor
+ * to suit various distance functions and use cases.
+ * (see: https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/query/vectordistance).
+ */
+
+const similarityScoreThreshold = 0.5;
+const cache = new AzureCosmosDBNoSQLSemanticCache(
+  embeddings,
+  config,
+  similarityScoreThreshold
+);
 
 const model = new ChatOpenAI({ cache });
 

diff --git a/libs/langchain-azure-cosmosdb/src/caches.ts b/libs/langchain-azure-cosmosdb/src/caches.ts
@@ -41,11 +41,16 @@ export class AzureCosmosDBNoSQLSemanticCache extends BaseCache {
 
   private config: AzureCosmosDBNoSQLConfig;
 
+  private similarityScoreThreshold: number;
+
   private cacheDict: { [key: string]: AzureCosmosDBNoSQLVectorStore } = {};
 
+  private vectorDistanceFunction: string;
+
   constructor(
     embeddings: EmbeddingsInterface,
-    dbConfig: AzureCosmosDBNoSQLConfig
+    dbConfig: AzureCosmosDBNoSQLConfig,
+    similarityScoreThreshold: number = 0.6
   ) {
     super();
     let client: CosmosClient;
@@ -88,13 +93,18 @@ export class AzureCosmosDBNoSQLSemanticCache extends BaseCache {
       client = dbConfig.client;
     }
 
+    this.vectorDistanceFunction =
+      dbConfig.vectorEmbeddingPolicy?.vectorEmbeddings[0].distanceFunction ??
+      "cosine";
+
     this.config = {
       ...dbConfig,
       client,
       databaseName: dbConfig.databaseName,
       containerName: dbConfig.containerName ?? DEFAULT_CONTAINER_NAME,
     };
     this.embeddings = embeddings;
+    this.similarityScoreThreshold = similarityScoreThreshold;
   }
 
   private getLlmCache(llmKey: string) {
@@ -118,14 +128,24 @@ export class AzureCosmosDBNoSQLSemanticCache extends BaseCache {
   public async lookup(prompt: string, llmKey: string) {
     const llmCache = this.getLlmCache(llmKey);
 
-    const results = await llmCache.similaritySearch(prompt, 1);
+    const results = await llmCache.similaritySearchWithScore(prompt, 1);
     if (!results.length) return null;
 
-    const generations = results.flatMap((result) =>
-      result.metadata.return_value.map((gen: string) =>
-        deserializeStoredGeneration(JSON.parse(gen))
-      )
-    );
+    const generations = results
+      .flatMap(([document, score]) => {
+        const isSimilar =
+          (this.vectorDistanceFunction === "euclidean" &&
+            score <= this.similarityScoreThreshold) ||
+          (this.vectorDistanceFunction !== "euclidean" &&
+            score >= this.similarityScoreThreshold);
+
+        if (!isSimilar) return undefined;
+
+        return document.metadata.return_value.map((gen: string) =>
+          deserializeStoredGeneration(JSON.parse(gen))
+        );
+      })
+      .filter((gen) => gen !== undefined);
 
     return generations.length > 0 ? generations : null;
   }

diff --git a/libs/langchain-azure-cosmosdb/src/tests/caches.int.test.ts b/libs/langchain-azure-cosmosdb/src/tests/caches.int.test.ts
@@ -6,8 +6,8 @@ import {
   IndexingMode,
   VectorEmbeddingPolicy,
 } from "@azure/cosmos";
-import { FakeEmbeddings, FakeLLM } from "@langchain/core/utils/testing";
 import { DefaultAzureCredential } from "@azure/identity";
+import { ChatOpenAI, OpenAIEmbeddings } from "@langchain/openai";
 import { AzureCosmosDBNoSQLSemanticCache } from "../caches.js";
 
 const DATABASE_NAME = "langchainTestCacheDB";
@@ -31,33 +31,42 @@ function vectorEmbeddingPolicy(
         path: "/embedding",
         dataType: "float32",
         distanceFunction,
-        dimensions: 0,
+        dimensions: 505,
       },
     ],
   };
 }
 
 function initializeCache(
   indexType: any,
-  distanceFunction: any
+  distanceFunction: any,
+  similarityThreshold?: number
 ): AzureCosmosDBNoSQLSemanticCache {
   let cache: AzureCosmosDBNoSQLSemanticCache;
   if (process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING) {
-    cache = new AzureCosmosDBNoSQLSemanticCache(new FakeEmbeddings(), {
-      databaseName: DATABASE_NAME,
-      containerName: CONTAINER_NAME,
-      connectionString: process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING,
-      indexingPolicy: indexingPolicy(indexType),
-      vectorEmbeddingPolicy: vectorEmbeddingPolicy(distanceFunction),
-    });
+    cache = new AzureCosmosDBNoSQLSemanticCache(
+      new OpenAIEmbeddings(),
+      {
+        databaseName: DATABASE_NAME,
+        containerName: CONTAINER_NAME,
+        connectionString: process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING,
+        indexingPolicy: indexingPolicy(indexType),
+        vectorEmbeddingPolicy: vectorEmbeddingPolicy(distanceFunction),
+      },
+      similarityThreshold
+    );
   } else if (process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT) {
-    cache = new AzureCosmosDBNoSQLSemanticCache(new FakeEmbeddings(), {
-      databaseName: DATABASE_NAME,
-      containerName: CONTAINER_NAME,
-      endpoint: process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT,
-      indexingPolicy: indexingPolicy(indexType),
-      vectorEmbeddingPolicy: vectorEmbeddingPolicy(distanceFunction),
-    });
+    cache = new AzureCosmosDBNoSQLSemanticCache(
+      new OpenAIEmbeddings(),
+      {
+        databaseName: DATABASE_NAME,
+        containerName: CONTAINER_NAME,
+        endpoint: process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT,
+        indexingPolicy: indexingPolicy(indexType),
+        vectorEmbeddingPolicy: vectorEmbeddingPolicy(distanceFunction),
+      },
+      similarityThreshold
+    );
   } else {
     throw new Error(
       "Please set the environment variable AZURE_COSMOSDB_NOSQL_CONNECTION_STRING or AZURE_COSMOSDB_NOSQL_ENDPOINT"
@@ -78,6 +87,10 @@ function initializeCache(
  * Once you have the instance running, you need to set the following environment
  * variables before running the test:
  * - AZURE_COSMOSDB_NOSQL_CONNECTION_STRING or AZURE_COSMOSDB_NOSQL_ENDPOINT
+ * - AZURE_OPENAI_API_KEY
+ * - AZURE_OPENAI_API_INSTANCE_NAME
+ * - AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME
+ * - AZURE_OPENAI_API_VERSION
  */
 describe("Azure CosmosDB NoSQL Semantic Cache", () => {
   beforeEach(async () => {
@@ -105,58 +118,114 @@ describe("Azure CosmosDB NoSQL Semantic Cache", () => {
       // Ignore error if the database does not exist
     }
   });
+
   it("test AzureCosmosDBNoSqlSemanticCache with cosine quantizedFlat", async () => {
     const cache = initializeCache("quantizedFlat", "cosine");
-    const llm = new FakeLLM({});
-    const llmString = JSON.stringify(llm._identifyingParams());
+    const model = new ChatOpenAI({ cache });
+    const llmString = JSON.stringify(model._identifyingParams);
     await cache.update("foo", llmString, [{ text: "fizz" }]);
-    const cacheOutput = await cache.lookup("bar", llmString);
+
+    let cacheOutput = await cache.lookup("foo", llmString);
     expect(cacheOutput).toEqual([{ text: "fizz" }]);
+
+    cacheOutput = await cache.lookup("bar", llmString);
+    expect(cacheOutput).toEqual(null);
+
     await cache.clear(llmString);
   });
+
   it("test AzureCosmosDBNoSqlSemanticCache with cosine flat", async () => {
     const cache = initializeCache("flat", "cosine");
-    const llm = new FakeLLM({});
-    const llmString = JSON.stringify(llm._identifyingParams());
-    await cache.update("foo", llmString, [{ text: "Buzz" }]);
-    const cacheOutput = await cache.lookup("bar", llmString);
-    expect(cacheOutput).toEqual([{ text: "Buzz" }]);
+    const model = new ChatOpenAI({ cache });
+    const llmString = JSON.stringify(model._identifyingParams);
+    await cache.update("foo", llmString, [{ text: "fizz" }]);
+
+    let cacheOutput = await cache.lookup("foo", llmString);
+    expect(cacheOutput).toEqual([{ text: "fizz" }]);
+
+    cacheOutput = await cache.lookup("bar", llmString);
+    expect(cacheOutput).toEqual(null);
+
     await cache.clear(llmString);
   });
+
   it("test AzureCosmosDBNoSqlSemanticCache with dotProduct quantizedFlat", async () => {
     const cache = initializeCache("quantizedFlat", "dotproduct");
-    const llm = new FakeLLM({});
-    const llmString = JSON.stringify(llm._identifyingParams());
-    await cache.update("foo", llmString, [{ text: "fizz" }, { text: "Buzz" }]);
-    const cacheOutput = await cache.lookup("bar", llmString);
-    expect(cacheOutput).toEqual([{ text: "fizz" }, { text: "Buzz" }]);
+    const model = new ChatOpenAI({ cache });
+    const llmString = JSON.stringify(model._identifyingParams);
+    await cache.update("foo", llmString, [{ text: "fizz" }]);
+
+    let cacheOutput = await cache.lookup("foo", llmString);
+    expect(cacheOutput).toEqual([{ text: "fizz" }]);
+
+    cacheOutput = await cache.lookup("bar", llmString);
+    expect(cacheOutput).toEqual(null);
+
     await cache.clear(llmString);
   });
+
   it("test AzureCosmosDBNoSqlSemanticCache with dotProduct flat", async () => {
-    const cache = initializeCache("flat", "dotproduct");
-    const llm = new FakeLLM({});
-    const llmString = JSON.stringify(llm._identifyingParams());
-    await cache.update("foo", llmString, [{ text: "fizz" }, { text: "Buzz" }]);
-    const cacheOutput = await cache.lookup("bar", llmString);
-    expect(cacheOutput).toEqual([{ text: "fizz" }, { text: "Buzz" }]);
+    const cache = initializeCache("flat", "cosine");
+    const model = new ChatOpenAI({ cache });
+    const llmString = JSON.stringify(model._identifyingParams);
+    await cache.update("foo", llmString, [{ text: "fizz" }]);
+
+    let cacheOutput = await cache.lookup("foo", llmString);
+    expect(cacheOutput).toEqual([{ text: "fizz" }]);
+
+    cacheOutput = await cache.lookup("bar", llmString);
+    expect(cacheOutput).toEqual(null);
+
     await cache.clear(llmString);
   });
+
   it("test AzureCosmosDBNoSqlSemanticCache with euclidean quantizedFlat", async () => {
     const cache = initializeCache("quantizedFlat", "euclidean");
-    const llm = new FakeLLM({});
-    const llmString = JSON.stringify(llm._identifyingParams());
+    const model = new ChatOpenAI({ cache });
+    const llmString = JSON.stringify(model._identifyingParams);
     await cache.update("foo", llmString, [{ text: "fizz" }]);
-    const cacheOutput = await cache.lookup("bar", llmString);
+
+    let cacheOutput = await cache.lookup("foo", llmString);
     expect(cacheOutput).toEqual([{ text: "fizz" }]);
+
+    cacheOutput = await cache.lookup("bar", llmString);
+    expect(cacheOutput).toEqual(null);
+
     await cache.clear(llmString);
   });
+
   it("test AzureCosmosDBNoSqlSemanticCache with euclidean flat", async () => {
     const cache = initializeCache("flat", "euclidean");
-    const llm = new FakeLLM({});
-    const llmString = JSON.stringify(llm._identifyingParams());
-    await cache.update("foo", llmString, [{ text: "fizz" }, { text: "Buzz" }]);
-    const cacheOutput = await cache.lookup("bar", llmString);
-    expect(cacheOutput).toEqual([{ text: "fizz" }, { text: "Buzz" }]);
+    const model = new ChatOpenAI({ cache });
+    const llmString = JSON.stringify(model._identifyingParams);
+    await cache.update("foo", llmString, [{ text: "fizz" }]);
+
+    let cacheOutput = await cache.lookup("foo", llmString);
+    expect(cacheOutput).toEqual([{ text: "fizz" }]);
+
+    cacheOutput = await cache.lookup("bar", llmString);
+    expect(cacheOutput).toEqual(null);
+
     await cache.clear(llmString);
   });
+
+  it("test AzureCosmosDBNoSqlSemanticCache response according to similarity score", async () => {
+    const cache = initializeCache("quantizedFlat", "cosine");
+    const model = new ChatOpenAI({ cache });
+    const response1 = await model.invoke(
+      "Where is the headquarter of Microsoft?"
+    );
+    console.log(response1.content);
+    // gives similarity score of 0.56 which is less than the threshold of 0.6. The cache
+    // will retun null which will allow the model to generate result.
+    const response2 = await model.invoke(
+      "List all Microsoft offices in India."
+    );
+    expect(response2.content).not.toEqual(response1.content);
+    console.log(response2.content);
+    // gives similarity score of .63 > 0.6
+    const response3 = await model.invoke("Tell me something about Microsoft");
+    expect(response3.content).toEqual(response1.content);
+    console.log(response3.content);
+  });
 });