Skip to content

Commit

Permalink
fixup! perf: use internal id instead of string id
Browse files Browse the repository at this point in the history
  • Loading branch information
H4ad committed Jul 11, 2023
1 parent 3dc5bd4 commit 7f16262
Show file tree
Hide file tree
Showing 20 changed files with 197 additions and 143 deletions.
1 change: 1 addition & 0 deletions packages/orama/src/components.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ export * as documentsStore from './components/documents-store.js'
export * as index from './components/index.js'
export * as tokenizer from './components/tokenizer/index.js'
export * as sorter from './components/sorter.js'
export * as internalDocumentIDStore from './components/internal-document-id-store.js'
8 changes: 4 additions & 4 deletions packages/orama/src/components/documents-store.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { InternalDocumentStore, getInternalDocumentId, InternalDocumentID, InternalDocumentIDStore } from "./internal-document-store.js";
import { DocumentID, getInternalDocumentId, InternalDocumentID, InternalDocumentIDStore } from "./internal-document-id-store.js";
import { Document, IDocumentsStore, OpaqueDocumentStore } from "../types.js";

export interface DocumentsStore extends OpaqueDocumentStore {
Expand All @@ -23,7 +23,7 @@ export async function get(store: DocumentsStore, id: string): Promise<Document |
return store.docs[internalId]
}

export async function getMultiple(store: DocumentsStore, ids: InternalDocumentStore[]): Promise<(Document | undefined)[]> {
export async function getMultiple(store: DocumentsStore, ids: DocumentID[]): Promise<(Document | undefined)[]> {
const found: (Document | undefined)[] = Array.from({ length: ids.length })

for (let i = 0; i < ids.length; i++) {
Expand All @@ -38,7 +38,7 @@ export async function getAll(store: DocumentsStore): Promise<Record<string, Docu
return store.docs as unknown as Record<string, Document>
}

export async function store(store: DocumentsStore, id: InternalDocumentStore, doc: Document): Promise<boolean> {
export async function store(store: DocumentsStore, id: DocumentID, doc: Document): Promise<boolean> {
const internalId = getInternalDocumentId(store.sharedInternalDocumentStore, id);

if (typeof store.docs[internalId] !== 'undefined') {
Expand All @@ -51,7 +51,7 @@ export async function store(store: DocumentsStore, id: InternalDocumentStore, do
return true
}

export async function remove(store: DocumentsStore, id: InternalDocumentStore): Promise<boolean> {
export async function remove(store: DocumentsStore, id: DocumentID): Promise<boolean> {
const internalId = getInternalDocumentId(store.sharedInternalDocumentStore, id);

if (typeof store.docs[internalId] === 'undefined') {
Expand Down
3 changes: 2 additions & 1 deletion packages/orama/src/components/groups.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import type { Orama, ScalarSearchableValue, TokenScore, GroupByParams, GroupResult, Result, Reduce } from '../types.js'
import { createError } from '../errors.js'
import { getNested, intersect } from '../utils.js'
import { getDocumentIdFromInternalId } from "./internal-document-id-store.js";

interface PropertyGroup {
property: string
Expand Down Expand Up @@ -47,7 +48,7 @@ export async function getGroups<AggValue>(
}
}

const allIDs = results.map(([id]) => id)
const allIDs = results.map(([id]) => getDocumentIdFromInternalId(orama.internalDocumentIDStore, id))

// allDocs is already sorted by the sortBy algorithm
// We leverage on that to limit the number of documents returned
Expand Down
6 changes: 3 additions & 3 deletions packages/orama/src/components/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { InternalDocumentStore, getInternalDocumentId, InternalDocumentID, InternalDocumentIDStore } from "./internal-document-store.js";
import { DocumentID, getInternalDocumentId, InternalDocumentID, InternalDocumentIDStore } from "./internal-document-id-store.js";
import { createError } from "../errors.js";
import { create as avlCreate, find as avlFind, greaterThan as avlGreaterThan, insert as avlInsert, lessThan as avlLessThan, Node as AVLNode, rangeSearch as avlRangeSearch, removeDocument as avlRemoveDocument } from "../trees/avl.js";
import { create as radixCreate, find as radixFind, insert as radixInsert, Node as RadixNode, removeDocumentByWord as radixRemoveDocument } from "../trees/radix.js";
Expand Down Expand Up @@ -100,7 +100,7 @@ export async function calculateResultScores<I extends OpaqueIndex, D extends Opa
index: Index,
prop: string,
term: string,
ids: InternalDocumentStore[],
ids: DocumentID[],
): Promise<TokenScore[]> {
const documentIDs = Array.from(ids)

Expand Down Expand Up @@ -143,7 +143,7 @@ export async function create(
): Promise<Index> {
if (!index) {
index = {
sharedInternalDocumentStore: orama.internalDocumentStore,
sharedInternalDocumentStore: orama.internalDocumentIDStore,
indexes: {},
searchableProperties: [],
searchablePropertiesWithTypes: {},
Expand Down
66 changes: 66 additions & 0 deletions packages/orama/src/components/internal-document-id-store.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import { Orama } from '../types.js';

export type DocumentID = string | number;
export type InternalDocumentID = number;

export type InternalDocumentIDStore = {
idToInternalId: Map<string, number>;
internalIdToId: string[];
save: (store: InternalDocumentIDStore) => unknown;
load: (orama: Orama, raw: unknown) => void;
};

export function createInternalDocumentIDStore(): InternalDocumentIDStore {
return {
idToInternalId: new Map(),
internalIdToId: [],
save,
load,
};
}

export function save(store: InternalDocumentIDStore): unknown {
return {
internalIdToId: store.internalIdToId,
};
}

export function load(orama: Orama, raw: unknown): void {
const { internalIdToId } = raw as InternalDocumentIDStore;

orama.internalDocumentIDStore.idToInternalId.clear();
orama.internalDocumentIDStore.internalIdToId = [];

for (let i = 0; i < internalIdToId.length; i++) {
orama.internalDocumentIDStore.idToInternalId.set(internalIdToId[i], i + 1);
orama.internalDocumentIDStore.internalIdToId.push(internalIdToId[i]);
}
}

export function getInternalDocumentId(store: InternalDocumentIDStore, id: DocumentID): InternalDocumentID {
if (typeof id === "string") {
const internalId = store.idToInternalId.get(id);

if (internalId) {
return internalId;
}

const currentId = store.idToInternalId.size + 1;

store.idToInternalId.set(id, currentId);
store.internalIdToId.push(id);

return currentId;
}

return id as number;
}

export function getDocumentIdFromInternalId(store: InternalDocumentIDStore, internalId: InternalDocumentID): string {
if (store.internalIdToId.length < internalId) {
throw new Error(`Invalid internalId ${internalId}`);
}

return store.internalIdToId[internalId - 1];
}

30 changes: 0 additions & 30 deletions packages/orama/src/components/internal-document-store.ts

This file was deleted.

6 changes: 3 additions & 3 deletions packages/orama/src/components/sorter.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { InternalDocumentStore, getInternalDocumentId, InternalDocumentID, InternalDocumentIDStore } from "./internal-document-store.js";
import { DocumentID, getInternalDocumentId, InternalDocumentID, InternalDocumentIDStore } from "./internal-document-id-store.js";
import { createError } from "../errors.js";
import { ISorter, OpaqueSorter, Orama, Schema, SorterConfig, SorterParams, SortType, SortValue } from "../types.js";

Expand All @@ -20,7 +20,7 @@ export type DefaultSorter = ISorter<Sorter>

function innerCreate(orama: Orama, schema: Schema, sortableDeniedProperties: string[], prefix: string): Sorter {
const sorter: Sorter = {
sharedInternalDocumentStore: orama.internalDocumentStore,
sharedInternalDocumentStore: orama.internalDocumentIDStore,
enabled: true,
sortableProperties: [],
sortablePropertiesWithTypes: {},
Expand Down Expand Up @@ -143,7 +143,7 @@ async function insert(
}
}

async function remove(sorter: Sorter, prop: string, id: InternalDocumentStore) {
async function remove(sorter: Sorter, prop: string, id: DocumentID) {
if (!sorter.enabled) {
return
}
Expand Down
4 changes: 2 additions & 2 deletions packages/orama/src/methods/create.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { createDocumentsStore } from '../components/documents-store.js'
import { OBJECT_COMPONENTS, FUNCTION_COMPONENTS, SINGLE_OR_ARRAY_COMPONENTS } from '../components/hooks.js'
import { createIndex } from '../components/index.js'
import { createTokenizer } from '../components/tokenizer/index.js'
import { createInternalDocumentIDStore, InternalDocumentStore, InternalDocumentID, InternalDocumentIDStore } from "../components/internal-document-store.js";
import { createInternalDocumentIDStore } from "../components/internal-document-id-store.js";
import { createError } from '../errors.js'
import { uniqueId } from '../utils.js'
import {
Expand Down Expand Up @@ -159,7 +159,7 @@ export async function create<P extends ProvidedTypes>({
index,
sorter,
documentsStore,
internalDocumentStore,
internalDocumentIDStore: internalDocumentStore,
getDocumentProperties,
getDocumentIndexId,
validateSchema,
Expand Down
13 changes: 9 additions & 4 deletions packages/orama/src/methods/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { getFacets } from '../components/facets.js'
import { intersectFilteredIDs } from '../components/filters.js'
import { getGroups } from '../components/groups.js'
import { runAfterSearch } from '../components/hooks.js'
import { InternalDocumentID } from "../components/internal-document-store.js";
import { getDocumentIdFromInternalId, InternalDocumentID } from "../components/internal-document-id-store.js";
import { createError } from '../errors.js'
import {
BM25Params,
Expand Down Expand Up @@ -247,7 +247,12 @@ export async function search<AggValue = Result[]>(
}

if (typeof results !== 'undefined') {
searchResult.hits = results.filter(Boolean)
for (const result of results) {
if (!result) continue;

result.id = getDocumentIdFromInternalId(orama.internalDocumentIDStore, +result.id);
searchResult.hits.push(result);
}
}

if (shouldCalculateFacets) {
Expand Down Expand Up @@ -318,7 +323,7 @@ async function fetchDocumentsWithDistinct(
continue
}

results.push({ id, score, document: doc! })
results.push({ id: id.toString(), score, document: doc! })
resultIDs.add(id)

// reached the limit, break the loop
Expand Down Expand Up @@ -361,7 +366,7 @@ async function fetchDocuments(
// We retrieve the full document only AFTER making sure that we really want it.
// We never retrieve the full document preventively.
const fullDoc = await orama.documentsStore.get(docs, id)
results[i] = { id, score, document: fullDoc! }
results[i] = { id: id.toString(), score, document: fullDoc! }
resultIDs.add(id)
}
}
Expand Down
4 changes: 4 additions & 0 deletions packages/orama/src/methods/serialization.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
import { Orama } from '../types.js'

export interface RawData {
internalIdStore: unknown
index: unknown
docs: unknown
sorting: unknown
}

export async function load(orama: Orama, raw: RawData): Promise<void> {
orama.internalDocumentIDStore.load(orama, raw.internalIdStore);

orama.data.index = await orama.index.load(raw.index)
orama.data.docs = await orama.documentsStore.load(raw.docs)
orama.data.sorting = await orama.sorter.load(raw.sorting)
}

export async function save(orama: Orama): Promise<RawData> {
return {
internalIdStore: orama.internalDocumentIDStore.save(orama.internalDocumentIDStore),
index: await orama.index.save(orama.data.index),
docs: await orama.documentsStore.save(orama.data.docs),
sorting: await orama.sorter.save(orama.data.sorting),
Expand Down
2 changes: 1 addition & 1 deletion packages/orama/src/trees/radix.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { syncBoundedLevenshtein } from "../components/levenshtein.js";
import { InternalDocumentID } from "../components/internal-document-store.js";
import { InternalDocumentID } from "../components/internal-document-id-store.js";
import { getOwnProperty } from "../utils.js";

export interface Node {
Expand Down
16 changes: 8 additions & 8 deletions packages/orama/src/types.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Language } from "./components/tokenizer/languages.js";
import { InternalDocumentStore, InternalDocumentID, InternalDocumentIDStore } from "./components/internal-document-store.js";
import { DocumentID, InternalDocumentID, InternalDocumentIDStore } from "./components/internal-document-id-store.js";

export type Nullable<T> = T | null

Expand Down Expand Up @@ -301,7 +301,7 @@ export type Result = {
/**
* The id of the document.
*/
id: InternalDocumentID
id: string;
/**
* The score of the document in the search.
*/
Expand Down Expand Up @@ -449,7 +449,7 @@ export interface IIndex<I extends OpaqueIndex = OpaqueIndex> {
index: I,
prop: string,
term: string,
ids: InternalDocumentStore[],
ids: DocumentID[],
): SyncOrAsyncValue<TokenScore[]>

search<D extends OpaqueDocumentStore, AggValue = Result[]>(
Expand All @@ -475,11 +475,11 @@ export interface IDocumentsStore<D extends OpaqueDocumentStore = OpaqueDocumentS
create<S extends Schema, I extends OpaqueIndex, So extends OpaqueSorter>(
orama: Orama<{ Schema: S; Index: I; DocumentStore: D; Sorter: So }>,
): SyncOrAsyncValue<D>
get(store: D, id: InternalDocumentStore): SyncOrAsyncValue<Document | undefined>
getMultiple(store: D, ids: InternalDocumentStore[]): SyncOrAsyncValue<(Document | undefined)[]>
get(store: D, id: DocumentID): SyncOrAsyncValue<Document | undefined>
getMultiple(store: D, ids: DocumentID[]): SyncOrAsyncValue<(Document | undefined)[]>
getAll(store: D): SyncOrAsyncValue<Record<string, Document>>
store(store: D, id: InternalDocumentStore, doc: Document): SyncOrAsyncValue<boolean>
remove(store: D, id: InternalDocumentStore): SyncOrAsyncValue<boolean>
store(store: D, id: DocumentID, doc: Document): SyncOrAsyncValue<boolean>
remove(store: D, id: DocumentID): SyncOrAsyncValue<boolean>
count(store: D): SyncOrAsyncValue<number>

load<R = unknown>(raw: R): SyncOrAsyncValue<D>
Expand Down Expand Up @@ -601,7 +601,7 @@ type Internals<P extends ProvidedTypes> = {
documentsStore: IDocumentsStore<P['DocumentStore']>
sorter: ISorter<P['Sorter']>
data: Data<P['Index'], P['DocumentStore'], P['Sorter']>
internalDocumentStore: InternalDocumentIDStore
internalDocumentIDStore: InternalDocumentIDStore
caches: Record<string, unknown>
[kInsertions]: number | undefined
[kRemovals]: number | undefined
Expand Down
5 changes: 2 additions & 3 deletions packages/orama/tests/array.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import t from 'tap'
import { getInternalDocumentId } from "../src/components/internal-document-store.js";
import { create, getByID, insert, insertMultiple, load, remove, save, search, update } from '../src/index.js'

t.test('create should support array of string', async t => {
Expand Down Expand Up @@ -240,7 +239,7 @@ async function checkSearchTerm(t, db, term, expectedIds) {
})
t.equal(result.hits.length, expectedIds.length)
t.equal(result.count, expectedIds.length)
t.strictSame(new Set(result.hits.map(h => h.id)), new Set(expectedIds.map(getInternalDocumentId)))
t.strictSame(new Set(result.hits.map(h => h.id)), new Set(expectedIds))
}

async function checkSearchWhere(t, db, key, where, expectedIds) {
Expand All @@ -251,7 +250,7 @@ async function checkSearchWhere(t, db, key, where, expectedIds) {
})
t.equal(result.hits.length, expectedIds.length)
t.equal(result.count, expectedIds.length)
t.strictSame(new Set(result.hits.map(h => h.id)), new Set(expectedIds.map(getInternalDocumentId)))
t.strictSame(new Set(result.hits.map(h => h.id)), new Set(expectedIds))
}

async function checkSearchFacets(t: Tap.Test, db, key, facet, expectedFacet) {
Expand Down
Loading

0 comments on commit 7f16262

Please sign in to comment.