Skip to content

Commit

Permalink
Merge pull request #151 from llm-tools/loaders
Browse files Browse the repository at this point in the history
Introduce stores (replaces caches)
  • Loading branch information
adhityan authored Nov 1, 2024
2 parents 081163b + e5faa65 commit 2de6d3b
Show file tree
Hide file tree
Showing 57 changed files with 1,676 additions and 954 deletions.
4 changes: 2 additions & 2 deletions core/embedjs-interfaces/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
"version": "0.1.13",
"description": "Interfaces for extending the embedjs ecosystem",
"dependencies": {
"@langchain/core": "^0.3.15",
"@langchain/core": "^0.3.16",
"debug": "^4.3.7",
"md5": "^2.3.0",
"uuid": "^10.0.0"
"uuid": "^11.0.2"
},
"type": "module",
"main": "./src/index.js",
Expand Down
6 changes: 3 additions & 3 deletions core/embedjs-interfaces/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import { BaseLoader } from './interfaces/base-loader.js';
import { BaseDb } from './interfaces/base-db.js';
import { BaseVectorDatabase } from './interfaces/base-vector-database.js';
import { BaseEmbeddings } from './interfaces/base-embeddings.js';
import { BaseCache } from './interfaces/base-cache.js';
import { BaseStore } from './interfaces/base-store.js';
import { BaseModel } from './interfaces/base-model.js';

export * from './types.js';
export * from './constants.js';
export { BaseDb, BaseCache, BaseLoader, BaseEmbeddings, BaseModel };
export { BaseStore, BaseVectorDatabase, BaseLoader, BaseEmbeddings, BaseModel };
21 changes: 0 additions & 21 deletions core/embedjs-interfaces/src/interfaces/base-cache.ts

This file was deleted.

107 changes: 36 additions & 71 deletions core/embedjs-interfaces/src/interfaces/base-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,72 +2,24 @@ import md5 from 'md5';
import createDebugMessages from 'debug';
import { EventEmitter } from 'node:events';

import { BaseCache } from './base-cache.js';
import { LoaderList, LoaderChunk, UnfilteredLoaderChunk } from '../types.js';
import { BaseStore } from './base-store.js';
import { LoaderChunk, UnfilteredLoaderChunk } from '../types.js';

export abstract class BaseLoader<
MetadataTemplate extends Record<string, string | number | boolean> = Record<string, string | number | boolean>,
CacheTemplate extends Record<string, unknown> = Record<string, unknown>,
> extends EventEmitter {
private static cache: Pick<
BaseCache,
'loaderCustomDelete' | 'loaderCustomGet' | 'loaderCustomHas' | 'loaderCustomSet'
>;
private static readonly LOADERS_LIST_CACHE_KEY = 'LOADERS_LIST_CACHE_KEY';

public static setCache(cache: BaseCache) {
BaseLoader.cache = cache;
}

private static async recordLoaderInCache(
loaderName: string,
uniqueId: string,
loaderMetadata: Record<string, unknown>,
) {
if (!BaseLoader.cache) return;

if (await BaseLoader.cache.loaderCustomHas(BaseLoader.LOADERS_LIST_CACHE_KEY)) {
const current = await BaseLoader.cache.loaderCustomGet<{ list: LoaderList }>(
BaseLoader.LOADERS_LIST_CACHE_KEY,
);

current.list.push({
type: loaderName,
uniqueId,
loaderMetadata,
});

current.list = [...new Map(current.list.map((item) => [item.uniqueId, item])).values()];
BaseLoader.cache.loaderCustomSet(BaseLoader.LOADERS_LIST_CACHE_KEY, current);
} else {
BaseLoader.cache.loaderCustomSet<{ list: LoaderList }>(BaseLoader.LOADERS_LIST_CACHE_KEY, {
list: [
{
type: loaderName,
uniqueId,
loaderMetadata,
},
],
});
}
}

public static async getLoadersList() {
if (!BaseLoader.cache) return null;
private static store: BaseStore;

if (await BaseLoader.cache.loaderCustomHas(BaseLoader.LOADERS_LIST_CACHE_KEY)) {
const current = await BaseLoader.cache.loaderCustomGet<{ list: LoaderList }>(
BaseLoader.LOADERS_LIST_CACHE_KEY,
);

return current.list;
} else return <LoaderList>[];
public static setCache(store: BaseStore) {
BaseLoader.store = store;
}

protected readonly uniqueId: string;
private readonly _canIncrementallyLoad: boolean;
protected readonly chunkOverlap: number;
protected readonly chunkSize: number;
protected readonly chunkOverlap: number;
public readonly canIncrementallyLoad: boolean;
protected readonly loaderMetadata: Record<string, unknown>;

constructor(
uniqueId: string,
Expand All @@ -79,47 +31,56 @@ export abstract class BaseLoader<
super();

this.uniqueId = uniqueId;
this._canIncrementallyLoad = canIncrementallyLoad;
this.chunkOverlap = chunkOverlap;
this.chunkSize = chunkSize;
this.chunkOverlap = chunkOverlap;
this.loaderMetadata = loaderMetadata;
this.canIncrementallyLoad = canIncrementallyLoad;

BaseLoader.recordLoaderInCache(this.constructor.name, uniqueId, loaderMetadata);
createDebugMessages('embedjs:loader:BaseLoader')(`New loader class initalized with key ${uniqueId}`);
}

// eslint-disable-next-line @typescript-eslint/no-empty-function
public async init(): Promise<void> {}

public get canIncrementallyLoad() {
return this._canIncrementallyLoad;
}

public getUniqueId(): string {
return this.uniqueId;
}

private async recordLoaderInCache(chunksProcessed: number) {
if (!BaseLoader.store) return;

const loaderData = {
uniqueId: this.uniqueId,
type: this.constructor.name,
loaderMetadata: this.loaderMetadata,
chunksProcessed,
};

await BaseLoader.store.addLoaderMetadata(this.uniqueId, loaderData);
}

private getCustomCacheKey(key: string) {
return `LOADER_CUSTOM_${this.uniqueId}_${key}`;
}

protected async checkInCache(key: string) {
if (!BaseLoader.cache) return false;
return BaseLoader.cache.loaderCustomHas(this.getCustomCacheKey(key));
if (!BaseLoader.store) return false;
return BaseLoader.store.loaderCustomHas(this.getCustomCacheKey(key));
}

protected async getFromCache(key: string): Promise<CacheTemplate> {
if (!BaseLoader.cache) return null;
return BaseLoader.cache.loaderCustomGet(this.getCustomCacheKey(key));
if (!BaseLoader.store) return null;
return BaseLoader.store.loaderCustomGet(this.getCustomCacheKey(key));
}

protected async saveToCache(key: string, value: CacheTemplate) {
if (!BaseLoader.cache) return;
await BaseLoader.cache.loaderCustomSet(this.getCustomCacheKey(key), value);
if (!BaseLoader.store) return;
await BaseLoader.store.loaderCustomSet(this.uniqueId, this.getCustomCacheKey(key), value);
}

protected async deleteFromCache(key: string) {
if (!BaseLoader.cache) return false;
return BaseLoader.cache.loaderCustomDelete(this.getCustomCacheKey(key));
if (!BaseLoader.store) return false;
return BaseLoader.store.loaderCustomDelete(this.getCustomCacheKey(key));
}

protected async loadIncrementalChunk(
Expand All @@ -135,6 +96,7 @@ export abstract class BaseLoader<
public async *getChunks(): AsyncGenerator<LoaderChunk<MetadataTemplate>, void, void> {
const chunks = await this.getUnfilteredChunks();

let count = 0;
for await (const chunk of chunks) {
chunk.pageContent = chunk.pageContent
.replace(/(\r\n|\n|\r)/gm, ' ')
Expand All @@ -146,8 +108,11 @@ export abstract class BaseLoader<
...chunk,
contentHash: md5(chunk.pageContent),
};
count++;
}
}

await this.recordLoaderInCache(count);
}

abstract getUnfilteredChunks(): AsyncGenerator<UnfilteredLoaderChunk<MetadataTemplate>, void, void>;
Expand Down
18 changes: 9 additions & 9 deletions core/embedjs-interfaces/src/interfaces/base-model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,20 @@ import createDebugMessages from 'debug';
import { v4 as uuidv4 } from 'uuid';

import { Chunk, QueryResponse, Message, SourceDetail, ModelResponse, Conversation } from '../types.js';
import { BaseCache } from './base-cache.js';
import { BaseStore } from './base-store.js';

export abstract class BaseModel {
private readonly baseDebug = createDebugMessages('embedjs:model:BaseModel');

private static cache: BaseCache;
private static store: BaseStore;
private static defaultTemperature: number;

public static setDefaultTemperature(temperature?: number) {
BaseModel.defaultTemperature = temperature;
}

public static setCache(cache: BaseCache) {
BaseModel.cache = cache;
public static setStore(cache: BaseStore) {
BaseModel.store = cache;
}

private readonly _temperature?: number;
Expand Down Expand Up @@ -83,18 +83,18 @@ export abstract class BaseModel {
let conversation: Conversation;

if (conversationId) {
if (!(await BaseModel.cache.hasConversation(conversationId))) {
if (!(await BaseModel.store.hasConversation(conversationId))) {
this.baseDebug(`Conversation with id '${conversationId}' is new`);
await BaseModel.cache.addConversation(conversationId);
await BaseModel.store.addConversation(conversationId);
}

conversation = await BaseModel.cache.getConversation(conversationId);
conversation = await BaseModel.store.getConversation(conversationId);
this.baseDebug(
`${conversation.entries.length} history entries found for conversationId '${conversationId}'`,
);

// Add user query to history
await BaseModel.cache.addEntryToConversation(conversationId, {
await BaseModel.store.addEntryToConversation(conversationId, {
id: uuidv4(),
timestamp: new Date(),
actor: 'HUMAN',
Expand Down Expand Up @@ -123,7 +123,7 @@ export abstract class BaseModel {

if (conversationId) {
// Add AI response to history
await BaseModel.cache.addEntryToConversation(conversationId, newEntry);
await BaseModel.store.addEntryToConversation(conversationId, newEntry);
}

return {
Expand Down
23 changes: 23 additions & 0 deletions core/embedjs-interfaces/src/interfaces/base-store.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import { Conversation, LoaderListEntry, Message } from '../types.js';

export interface BaseStore {
init(): Promise<void>;

addLoaderMetadata(loaderId: string, value: LoaderListEntry): Promise<void>;
getLoaderMetadata(loaderId: string): Promise<LoaderListEntry>;
hasLoaderMetadata(loaderId: string): Promise<boolean>;
getAllLoaderMetadata(): Promise<LoaderListEntry[]>;

loaderCustomSet<T extends Record<string, unknown>>(loaderId: string, key: string, value: T): Promise<void>;
loaderCustomGet<T extends Record<string, unknown>>(key: string): Promise<T>;
loaderCustomHas(key: string): Promise<boolean>;
loaderCustomDelete(key: string): Promise<void>;
deleteLoaderMetadataAndCustomValues(loaderId: string): Promise<void>;

addConversation(conversationId: string): Promise<void>;
getConversation(conversationId: string): Promise<Conversation>;
hasConversation(conversationId: string): Promise<boolean>;
deleteConversation(conversationId: string): Promise<void>;
addEntryToConversation(conversationId: string, entry: Message): Promise<void>;
clearConversations(): Promise<void>;
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { ExtractChunkData, InsertChunkData } from '../types.js';

export interface BaseDb {
export interface BaseVectorDatabase {
init({ dimensions }: { dimensions: number }): Promise<void>;
insertChunks(chunks: InsertChunkData[]): Promise<number>;
similaritySearch(query: number[], k: number): Promise<ExtractChunkData[]>;
Expand Down
5 changes: 3 additions & 2 deletions core/embedjs-interfaces/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,12 @@ export type SourceDetail = {
source: string;
};

export type LoaderList = {
export type LoaderListEntry = {
type: string;
uniqueId: string;
chunksProcessed: number;
loaderMetadata: Record<string, unknown>;
}[];
};

export type Message = {
id: string;
Expand Down
4 changes: 2 additions & 2 deletions core/embedjs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@
"@llm-tools/embedjs-interfaces": "0.1.13",
"@llm-tools/embedjs-utils": "0.1.13",
"debug": "^4.3.7",
"langchain": "^0.3.4",
"langchain": "^0.3.5",
"md5": "^2.3.0",
"mime": "^4.0.4",
"stream-mime-type": "^2.0.0"
},
"devDependencies": {
"@types/debug": "^4.1.12",
"@types/md5": "^2.3.5",
"@types/node": "^22.8.1"
"@types/node": "^22.8.6"
},
"main": "./src/index.js",
"license": "Apache-2.0",
Expand Down
Loading

0 comments on commit 2de6d3b

Please sign in to comment.