Skip to content

Commit

Permalink
langchain[patch]: Adds MMR to memory vector store (#6481)
Browse files Browse the repository at this point in the history
* Adds MMR to memory vector store

* Adds memory MMR docs
  • Loading branch information
jacoblee93 authored Aug 9, 2024
1 parent 8640a66 commit 8b0ac12
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 20 deletions.
55 changes: 52 additions & 3 deletions docs/core_docs/docs/integrations/vectorstores/memory.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"id": "aa0a16fa",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -199,7 +199,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"id": "5efd2eaa",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -232,7 +232,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"id": "f3460093",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -265,6 +265,55 @@
"await retriever.invoke(\"biology\");"
]
},
{
"cell_type": "markdown",
"id": "423d779a",
"metadata": {},
"source": [
"### Maximal marginal relevance\n",
"\n",
"This vector store also supports maximal marginal relevance (MMR), a technique that first fetches a larger number of results (given by `searchKwargs.fetchK`), with classic similarity search, then reranks for diversity and returns the top `k` results. This helps guard against redundant information:"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "56817a1c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[\n",
" Document {\n",
" pageContent: 'The powerhouse of the cell is the mitochondria',\n",
" metadata: { source: 'https://example.com' },\n",
" id: undefined\n",
" },\n",
" Document {\n",
" pageContent: 'Buildings are made out of brick',\n",
" metadata: { source: 'https://example.com' },\n",
" id: undefined\n",
" }\n",
"]\n"
]
}
],
"source": [
"const mmrRetriever = vectorStore.asRetriever({\n",
" searchType: \"mmr\",\n",
" searchKwargs: {\n",
" fetchK: 10,\n",
" },\n",
" // Optional filter\n",
" filter: filter,\n",
" k: 2,\n",
"});\n",
"\n",
"await mmrRetriever.invoke(\"biology\");"
]
},
{
"cell_type": "markdown",
"id": "e2e0a211",
Expand Down
78 changes: 61 additions & 17 deletions langchain/src/vectorstores/memory.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import { VectorStore } from "@langchain/core/vectorstores";
import {
MaxMarginalRelevanceSearchOptions,
VectorStore,
} from "@langchain/core/vectorstores";
import type { EmbeddingsInterface } from "@langchain/core/embeddings";
import { Document } from "@langchain/core/documents";
import { Document, DocumentInterface } from "@langchain/core/documents";
import { cosine } from "../util/ml-distance/similarities.js";
import { maximalMarginalRelevance } from "../util/math.js";

/**
* Interface representing a vector in memory. It includes the content
Expand Down Expand Up @@ -82,21 +86,11 @@ export class MemoryVectorStore extends VectorStore {
this.memoryVectors = this.memoryVectors.concat(memoryVectors);
}

/**
* Method to perform a similarity search in the memory vector store. It
* calculates the similarity between the query vector and each vector in
* the store, sorts the results by similarity, and returns the top `k`
* results along with their scores.
* @param query Query vector to compare against the vectors in the store.
* @param k Number of top results to return.
* @param filter Optional filter function to apply to the vectors before performing the search.
* @returns Promise that resolves with an array of tuples, each containing a `Document` and its similarity score.
*/
async similaritySearchVectorWithScore(
protected async _queryVectors(
query: number[],
k: number,
filter?: this["FilterType"]
): Promise<[Document, number][]> {
) {
const filterFunction = (memoryVector: MemoryVector) => {
if (!filter) {
return true;
Expand All @@ -109,25 +103,75 @@ export class MemoryVectorStore extends VectorStore {
return filter(doc);
};
const filteredMemoryVectors = this.memoryVectors.filter(filterFunction);
const searches = filteredMemoryVectors
return filteredMemoryVectors
.map((vector, index) => ({
similarity: this.similarity(query, vector.embedding),
index,
metadata: vector.metadata,
content: vector.content,
embedding: vector.embedding,
}))
.sort((a, b) => (a.similarity > b.similarity ? -1 : 0))
.slice(0, k);
}

/**
* Method to perform a similarity search in the memory vector store. It
* calculates the similarity between the query vector and each vector in
* the store, sorts the results by similarity, and returns the top `k`
* results along with their scores.
* @param query Query vector to compare against the vectors in the store.
* @param k Number of top results to return.
* @param filter Optional filter function to apply to the vectors before performing the search.
* @returns Promise that resolves with an array of tuples, each containing a `Document` and its similarity score.
*/
async similaritySearchVectorWithScore(
query: number[],
k: number,
filter?: this["FilterType"]
): Promise<[Document, number][]> {
const searches = await this._queryVectors(query, k, filter);
const result: [Document, number][] = searches.map((search) => [
new Document({
metadata: filteredMemoryVectors[search.index].metadata,
pageContent: filteredMemoryVectors[search.index].content,
metadata: search.metadata,
pageContent: search.content,
}),
search.similarity,
]);

return result;
}

async maxMarginalRelevanceSearch(
query: string,
options: MaxMarginalRelevanceSearchOptions<this["FilterType"]>
): Promise<DocumentInterface[]> {
const queryEmbedding = await this.embeddings.embedQuery(query);

const searches = await this._queryVectors(
queryEmbedding,
options.fetchK ?? 20,
options.filter
);

const embeddingList = searches.map((searchResp) => searchResp.embedding);

const mmrIndexes = maximalMarginalRelevance(
queryEmbedding,
embeddingList,
options.lambda,
options.k
);

return mmrIndexes.map(
(idx) =>
new Document({
metadata: searches[idx].metadata,
pageContent: searches[idx].content,
})
);
}

/**
* Static method to create a `MemoryVectorStore` instance from an array of
* texts. It creates a `Document` for each text and metadata pair, and
Expand Down
30 changes: 30 additions & 0 deletions langchain/src/vectorstores/tests/memory.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,33 @@ test("MemoryVectorStore with custom similarity", async () => {
expect(similarityCalledCount).toBe(4);
expect(results).toHaveLength(3);
});

test("MemoryVectorStore with max marginal relevance", async () => {
const embeddings = new SyntheticEmbeddings({
vectorSize: 1536,
});
let similarityCalled = false;
let similarityCalledCount = 0;
const store = new MemoryVectorStore(embeddings, {
similarity: (a: number[], b: number[]) => {
similarityCalledCount += 1;
similarityCalled = true;
return cosine(a, b);
},
});

expect(store).toBeDefined();

await store.addDocuments([
{ pageContent: "hello", metadata: { a: 1 } },
{ pageContent: "hi", metadata: { a: 1 } },
{ pageContent: "bye", metadata: { a: 1 } },
{ pageContent: "what's this", metadata: { a: 1 } },
]);

const results = await store.maxMarginalRelevanceSearch("hello", { k: 3 });

expect(similarityCalled).toBe(true);
expect(similarityCalledCount).toBe(4);
expect(results).toHaveLength(3);
});

0 comments on commit 8b0ac12

Please sign in to comment.