-
Notifications
You must be signed in to change notification settings - Fork 2.3k
/
Copy pathvectara.int.test.ts
289 lines (262 loc) Β· 9.49 KB
/
vectara.int.test.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
/* eslint-disable @typescript-eslint/no-unused-vars */
/* eslint-disable no-process-env */
import fs from "fs";
import { expect, beforeAll } from "@jest/globals";
import { insecureHash } from "@langchain/core/utils/hash";
import { Document } from "@langchain/core/documents";
import { FakeEmbeddings } from "@langchain/core/utils/testing";
import {
VectaraFile,
VectaraLibArgs,
VectaraStore,
VectaraSummary,
} from "../vectara.js";
import { VectaraSummaryRetriever } from "../../retrievers/vectara_summary.js";
const getDocs = (): Document[] => {
// Some text from Lord of the Rings
const englishOne = `It all depends on what you want. You can trust us to stick to you through thick and thin to the
bitter end. And you can trust us to keep any secret of yours - closer than you keep it yourself.
But you cannot trust us to let you face trouble alone, and go off without a word. We are your
friends, Frodo. Anyway: there it is. We know most of what Gandalf has told you. We know a good
deal about the Ring. We are horribly afraid - but we are coming with you; or following you
like hounds.`;
const englishTwo = `Sam lay back, and stared with open mouth, and for a moment, between bewilderment and great joy,
he could not answer. At last he gasped: βGandalf! I thought you were dead! But then I thought I
was dead myself. Is everything sad going to come untrue? What's happened to the world?`;
const frenchOne = `Par exemple, sur la planète Terre, l'homme a toujours supposé qu'il était plus intelligent que les dauphins
parce qu'il avait accompli tant de choses - la roue, New York, les guerres, etc. passer du
bon temps. Mais Γ l'inverse, les dauphins ont toujours cru qu'ils Γ©taient bien plus
intelligents que l'homme, pour les mΓͺmes raisons prΓ©cisΓ©ment.`;
const documents = [
new Document({
pageContent: englishOne,
metadata: {
document_id: insecureHash(englishOne), // Generate a hashcode for document id based on the text
title: "Lord of the Rings",
author: "Tolkien",
genre: "fiction",
lang: "eng",
},
}),
new Document({
pageContent: englishTwo,
metadata: {
document_id: insecureHash(englishTwo), // Generate a hashcode for document id based on the text
title: "Lord of the Rings",
author: "Tolkien",
genre: "fiction",
lang: "eng",
},
}),
new Document({
pageContent: frenchOne,
metadata: {
document_id: insecureHash(frenchOne), // Generate a hashcode for document id based on the text
title: "The hitchhiker's guide to the galaxy",
author: "Douglas Adams",
genre: "fiction",
lang: "fra",
},
}),
];
return documents;
};
let corpusId: number[] = [];
const envValue = process.env.VECTARA_CORPUS_ID;
if (envValue) {
corpusId = envValue.split(",").map((id) => {
const num = Number(id);
if (Number.isNaN(num)) corpusId = [0];
return num;
});
if (corpusId.length === 0) corpusId = [0];
} else {
corpusId = [0];
}
describe("VectaraStore", () => {
["VECTARA_CUSTOMER_ID", "VECTARA_CORPUS_ID", "VECTARA_API_KEY"].forEach(
(envVar) => {
if (!process.env[envVar]) {
throw new Error(`${envVar} not set`);
}
}
);
describe("fromTexts", () => {
const args: VectaraLibArgs = {
customerId: Number(process.env.VECTARA_CUSTOMER_ID) || 0,
corpusId,
apiKey: process.env.VECTARA_API_KEY || "",
};
test.skip("with fakeEmbeddings doesn't throw error", () => {
expect(() =>
VectaraStore.fromTexts([], [], new FakeEmbeddings(), args)
).not.toThrow();
});
});
describe("fromDocuments", () => {
const args: VectaraLibArgs = {
customerId: Number(process.env.VECTARA_CUSTOMER_ID) || 0,
corpusId,
apiKey: process.env.VECTARA_API_KEY || "",
};
test.skip("with fakeEmbeddings doesn't throw error", async () => {
await expect(
VectaraStore.fromDocuments(getDocs(), new FakeEmbeddings(), args)
).resolves.toBeDefined();
});
});
describe("access operations", () => {
let store: VectaraStore;
let doc_ids: string[] = [];
beforeAll(async () => {
store = new VectaraStore({
customerId: Number(process.env.VECTARA_CUSTOMER_ID) || 0,
corpusId,
apiKey: process.env.VECTARA_API_KEY || "",
});
doc_ids = await store.addDocuments(getDocs());
});
test.skip("similaritySearchWithScore", async () => {
const resultsWithScore = await store.similaritySearchWithScore(
"What did Sam do?",
10, // Number of results needed
{ lambda: 0.025 }
);
expect(resultsWithScore.length).toBeGreaterThan(0);
expect(resultsWithScore[0][0].pageContent.length).toBeGreaterThan(0);
expect(resultsWithScore[0][0].metadata.title).toBe("Lord of the Rings");
expect(resultsWithScore[0][1]).toBeGreaterThan(0);
});
test.skip("similaritySearch", async () => {
const results = await store.similaritySearch(
"Was Gandalf dead?",
10, // Number of results needed
{
lambda: 0.025,
contextConfig: {
sentencesAfter: 1,
sentencesBefore: 1,
},
}
);
expect(results.length).toBeGreaterThan(0);
expect(results[0].pageContent.length).toBeGreaterThan(0);
expect(results[0].metadata.title).toBe("Lord of the Rings");
});
test.skip("similaritySearch with filter", async () => {
const results = await store.similaritySearch(
"Was Gandalf dead?",
10, // Number of results needed
{ filter: "part.lang = 'fra'", lambda: 0.025 } // Filter on the language of the document
);
expect(results.length).toBeGreaterThan(0);
expect(results[0].pageContent.length).toBeGreaterThan(0);
// Query filtered on French, so we expect only French results
const hasEnglish = results.some(
(result) =>
// eslint-disable-next-line @typescript-eslint/no-explicit-any
result.metadata.lang === "eng"
);
expect(hasEnglish).toBe(false);
});
test.skip("similaritySearch with contextConfig", async () => {
const results = await store.similaritySearch(
"Was Gandalf dead?",
10, // Number of results needed
{
lambda: 0.025,
contextConfig: {
charsBefore: 30,
charsAfter: 30,
sentencesBefore: 3,
sentencesAfter: 3,
startTag: "<b>",
endTag: "</b>",
},
}
);
expect(results.length).toBeGreaterThan(0);
expect(results[0].pageContent.length).toBeGreaterThan(0);
});
test.skip("similaritySearch with MMR", async () => {
const results = await store.similaritySearch(
"Was Gandalf dead?",
10, // Number of results needed
{
lambda: 0.025,
mmrConfig: {
diversityBias: 1.0,
},
}
);
expect(results.length).toBeGreaterThan(0);
expect(results[0].pageContent.length).toBeGreaterThan(0);
});
test.skip("RAG retrieval with generative summarization", async () => {
const summaryConfig: VectaraSummary = {
enabled: true,
summarizerPromptName: "vectara-summary-ext-v1.2.0",
maxSummarizedResults: 3,
responseLang: "ita",
};
const topK = 3;
const retriever = new VectaraSummaryRetriever({
vectara: store,
topK,
summaryConfig,
filter: {
lambda: 0.025,
},
});
const result = await retriever.getRelevantDocuments("Was Gandalf dead?");
expect(result.length).toBeGreaterThan(0);
expect(result.length).toBe(topK + 1); // +1 for the summary
expect(result[0].pageContent.length).toBeGreaterThan(0);
});
test.skip("addFiles", async () => {
const docs = getDocs();
const englishOneContent = docs[0].pageContent;
const frenchOneContent = docs[2].pageContent;
const files = [
{ filename: "englishOne.txt", content: englishOneContent },
{ filename: "frenchOne.txt", content: frenchOneContent },
];
const vectaraFiles: VectaraFile[] = [];
for (const file of files) {
fs.writeFileSync(file.filename, file.content);
const buffer = fs.readFileSync(file.filename);
vectaraFiles.push({
blob: new Blob([buffer], { type: "text/plain" }),
fileName: file.filename,
});
}
const bitcoinBuffer = fs.readFileSync(
"../examples/src/document_loaders/example_data/bitcoin.pdf"
);
vectaraFiles.push({
blob: new Blob([bitcoinBuffer], { type: "application/pdf" }),
fileName: "bitcoin.pdf",
});
const file_doc_ids = await store.addFiles(vectaraFiles);
doc_ids = [...doc_ids, ...file_doc_ids];
for (const file of files) {
fs.unlinkSync(file.filename);
}
expect(file_doc_ids.length).toEqual(3);
const searchResults = await store.similaritySearch("What is bitcoin");
expect(searchResults.length).toBeGreaterThan(0);
expect(searchResults[0].pageContent).toContain(
"A Peer-to-Peer Electronic Cash System"
);
});
// delete documents added in the test
afterAll(async () => {
store = new VectaraStore({
customerId: Number(process.env.VECTARA_CUSTOMER_ID) || 0,
corpusId,
apiKey: process.env.VECTARA_API_KEY || "",
});
await store.deleteDocuments(doc_ids);
});
});
});