Skip to content

Commit

Permalink
refactor: move the internal id to orama
Browse files Browse the repository at this point in the history
  • Loading branch information
H4ad committed Jul 11, 2023
1 parent 3a88cd2 commit 3dc5bd4
Show file tree
Hide file tree
Showing 14 changed files with 108 additions and 119 deletions.
31 changes: 17 additions & 14 deletions packages/orama/src/components/documents-store.ts
Original file line number Diff line number Diff line change
@@ -1,31 +1,33 @@
import { createRecordWithToJson, DocumentID, getInternalDocumentId, InternalDocumentID } from "../document-id.js";
import { InternalDocumentStore, getInternalDocumentId, InternalDocumentID, InternalDocumentIDStore } from "./internal-document-store.js";
import { Document, IDocumentsStore, OpaqueDocumentStore } from "../types.js";

export interface DocumentsStore extends OpaqueDocumentStore {
sharedInternalDocumentStore: InternalDocumentIDStore;
docs: Record<InternalDocumentID, Document | undefined>,
count: number
}

export type DefaultDocumentsStore = IDocumentsStore<DocumentsStore>

export async function create(): Promise<DocumentsStore> {
export async function create(sharedInternalDocumentStore: InternalDocumentIDStore): Promise<DocumentsStore> {
return {
docs: createRecordWithToJson<Document | undefined>(),
sharedInternalDocumentStore,
docs: {},
count: 0,
}
}

export async function get(store: DocumentsStore, id: string): Promise<Document | undefined> {
const internalId = getInternalDocumentId(id);
const internalId = getInternalDocumentId(store.sharedInternalDocumentStore, id);

return store.docs[internalId]
}

export async function getMultiple(store: DocumentsStore, ids: DocumentID[]): Promise<(Document | undefined)[]> {
export async function getMultiple(store: DocumentsStore, ids: InternalDocumentStore[]): Promise<(Document | undefined)[]> {
const found: (Document | undefined)[] = Array.from({ length: ids.length })

for (let i = 0; i < ids.length; i++) {
const internalId = getInternalDocumentId(ids[i])
const internalId = getInternalDocumentId(store.sharedInternalDocumentStore, ids[i])
found[i] = store.docs[internalId]
}

Expand All @@ -36,8 +38,8 @@ export async function getAll(store: DocumentsStore): Promise<Record<string, Docu
return store.docs as unknown as Record<string, Document>
}

export async function store(store: DocumentsStore, id: DocumentID, doc: Document): Promise<boolean> {
const internalId = getInternalDocumentId(id);
export async function store(store: DocumentsStore, id: InternalDocumentStore, doc: Document): Promise<boolean> {
const internalId = getInternalDocumentId(store.sharedInternalDocumentStore, id);

if (typeof store.docs[internalId] !== 'undefined') {
return false
Expand All @@ -49,8 +51,8 @@ export async function store(store: DocumentsStore, id: DocumentID, doc: Document
return true
}

export async function remove(store: DocumentsStore, id: DocumentID): Promise<boolean> {
const internalId = getInternalDocumentId(id);
export async function remove(store: DocumentsStore, id: InternalDocumentStore): Promise<boolean> {
const internalId = getInternalDocumentId(store.sharedInternalDocumentStore, id);

if (typeof store.docs[internalId] === 'undefined') {
return false
Expand All @@ -66,12 +68,13 @@ export async function count(store: DocumentsStore): Promise<number> {
return store.count
}

export async function load<R = unknown>(raw: R): Promise<DocumentsStore> {
export async function load<R = unknown>(sharedInternalDocumentStore: InternalDocumentIDStore, raw: R): Promise<DocumentsStore> {
const rawDocument = raw as DocumentsStore

return {
docs: rawDocument.docs,
count: rawDocument.count,
sharedInternalDocumentStore,
}
}

Expand All @@ -82,16 +85,16 @@ export async function save<R = unknown>(store: DocumentsStore): Promise<R> {
} as R
}

export async function createDocumentsStore(): Promise<DefaultDocumentsStore> {
export async function createDocumentsStore(sharedInternalDocumentStore: InternalDocumentIDStore): Promise<DefaultDocumentsStore> {
return {
create,
create: create.bind(null, sharedInternalDocumentStore),
get,
getMultiple,
getAll,
store,
remove,
count,
load,
load: load.bind(null, sharedInternalDocumentStore),
save,
}
}
27 changes: 15 additions & 12 deletions packages/orama/src/components/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { createRecordWithToJson, DocumentID, getInternalDocumentId, InternalDocumentID } from "../document-id.js";
import { InternalDocumentStore, getInternalDocumentId, InternalDocumentID, InternalDocumentIDStore } from "./internal-document-store.js";
import { createError } from "../errors.js";
import { create as avlCreate, find as avlFind, greaterThan as avlGreaterThan, insert as avlInsert, lessThan as avlLessThan, Node as AVLNode, rangeSearch as avlRangeSearch, removeDocument as avlRemoveDocument } from "../trees/avl.js";
import { create as radixCreate, find as radixFind, insert as radixInsert, Node as RadixNode, removeDocumentByWord as radixRemoveDocument } from "../trees/radix.js";
Expand All @@ -23,6 +23,7 @@ export type BooleanIndex = {
}

export interface Index extends OpaqueIndex {
sharedInternalDocumentStore: InternalDocumentIDStore
indexes: Record<string, RadixNode | AVLNode<number, InternalDocumentID[]> | BooleanIndex>
searchableProperties: string[]
searchablePropertiesWithTypes: Record<string, SearchableType>
Expand All @@ -41,7 +42,7 @@ export async function insertDocumentScoreParameters(
tokens: string[],
docsCount: number,
): Promise<void> {
const internalId = getInternalDocumentId(id);
const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id);

index.avgFieldLength[prop] = ((index.avgFieldLength[prop] ?? 0) * (docsCount - 1) + tokens.length) / docsCount
index.fieldLengths[prop][internalId] = tokens.length
Expand All @@ -63,7 +64,7 @@ export async function insertTokenScoreParameters(
}
}

const internalId = getInternalDocumentId(id);
const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id);
const tf = tokenFrequency / tokens.length

index.frequencies[prop][internalId]![token] = tf
Expand All @@ -82,7 +83,7 @@ export async function removeDocumentScoreParameters(
id: string,
docsCount: number,
): Promise<void> {
const internalId = getInternalDocumentId(id);
const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id);

index.avgFieldLength[prop] =
(index.avgFieldLength[prop] * docsCount - index.fieldLengths[prop][internalId]!) / (docsCount - 1)
Expand All @@ -99,7 +100,7 @@ export async function calculateResultScores<I extends OpaqueIndex, D extends Opa
index: Index,
prop: string,
term: string,
ids: DocumentID[],
ids: InternalDocumentStore[],
): Promise<TokenScore[]> {
const documentIDs = Array.from(ids)

Expand All @@ -117,7 +118,7 @@ export async function calculateResultScores<I extends OpaqueIndex, D extends Opa
// Calculate TF-IDF value for each term, in each document, for each index.
const documentIDsLength = documentIDs.length
for (let k = 0; k < documentIDsLength; k++) {
const internalId = getInternalDocumentId(documentIDs[k]);
const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, documentIDs[k]);
const tf = oramaFrequencies?.[internalId]?.[term] ?? 0

const bm25 = BM25(
Expand All @@ -142,6 +143,7 @@ export async function create(
): Promise<Index> {
if (!index) {
index = {
sharedInternalDocumentStore: orama.internalDocumentStore,
indexes: {},
searchableProperties: [],
searchablePropertiesWithTypes: {},
Expand Down Expand Up @@ -175,7 +177,7 @@ export async function create(
case 'string[]':
index.indexes[path] = radixCreate()
index.avgFieldLength[path] = 0
index.frequencies[path] = createRecordWithToJson<FrequencyMap[string][number]>();
index.frequencies[path] = {};
index.tokenOccurrences[path] = {}
index.fieldLengths[path] = {}
break
Expand All @@ -201,7 +203,7 @@ async function insertScalar(
tokenizer: Tokenizer,
docsCount: number,
): Promise<void> {
const internalId = getInternalDocumentId(id);
const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id);

switch (schemaType) {
case 'boolean': {
Expand Down Expand Up @@ -272,7 +274,7 @@ async function removeScalar(
tokenizer: Tokenizer,
docsCount: number,
): Promise<boolean> {
const internalId = getInternalDocumentId(id);
const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id);

switch (schemaType) {
case 'number': {
Expand Down Expand Up @@ -471,7 +473,7 @@ export async function getSearchablePropertiesWithTypes(index: Index): Promise<Re
return index.searchablePropertiesWithTypes
}

export async function load<R = unknown>(raw: R): Promise<Index> {
export async function load<R = unknown>(sharedInternalDocumentStore: InternalDocumentIDStore, raw: R): Promise<Index> {
const {
indexes,
searchableProperties,
Expand All @@ -483,6 +485,7 @@ export async function load<R = unknown>(raw: R): Promise<Index> {
} = raw as Index

return {
sharedInternalDocumentStore,
indexes,
searchableProperties,
searchablePropertiesWithTypes,
Expand Down Expand Up @@ -515,7 +518,7 @@ export async function save<R = unknown>(index: Index): Promise<R> {
} as R
}

export async function createIndex(): Promise<DefaultIndex> {
export async function createIndex(sharedInternalDocumentStore: InternalDocumentIDStore): Promise<DefaultIndex> {
return {
create,
insert,
Expand All @@ -529,7 +532,7 @@ export async function createIndex(): Promise<DefaultIndex> {
searchByWhereClause,
getSearchableProperties,
getSearchablePropertiesWithTypes,
load,
load: load.bind(null, sharedInternalDocumentStore),
save,
}
}
30 changes: 30 additions & 0 deletions packages/orama/src/components/internal-document-store.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
export type InternalDocumentStore = string | number;
export type InternalDocumentID = number;

const InternalDocumentIDCounter = Symbol('InternalDocumentIDCounter');

export type InternalDocumentIDStore = Record<string, InternalDocumentID> & { [InternalDocumentIDCounter]: number };

export function createInternalDocumentIDStore(): InternalDocumentIDStore {
return {
[InternalDocumentIDCounter]: 0
};
}

export function getInternalDocumentId(store: InternalDocumentIDStore, id: InternalDocumentStore): InternalDocumentID {
if (typeof id === "string" && isNaN(+id)) {
const internalId = store[id];

if (internalId) {
return internalId;
}

const currentId = ++store[InternalDocumentIDCounter];

store[id] = currentId;

return currentId;
}

return id as number;
}
27 changes: 15 additions & 12 deletions packages/orama/src/components/sorter.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { createRecordWithToJson, DocumentID, getInternalDocumentId, InternalDocumentID } from "../document-id.js";
import { InternalDocumentStore, getInternalDocumentId, InternalDocumentID, InternalDocumentIDStore } from "./internal-document-store.js";
import { createError } from "../errors.js";
import { ISorter, OpaqueSorter, Orama, Schema, SorterConfig, SorterParams, SortType, SortValue } from "../types.js";

Expand All @@ -9,6 +9,7 @@ interface PropertySort<K> {
}

export interface Sorter extends OpaqueSorter {
sharedInternalDocumentStore: InternalDocumentIDStore;
enabled: boolean
sortableProperties: string[]
sortablePropertiesWithTypes: Record<string, SortType>
Expand All @@ -17,8 +18,9 @@ export interface Sorter extends OpaqueSorter {

export type DefaultSorter = ISorter<Sorter>

function innerCreate(schema: Schema, sortableDeniedProperties: string[], prefix: string): Sorter {
function innerCreate(orama: Orama, schema: Schema, sortableDeniedProperties: string[], prefix: string): Sorter {
const sorter: Sorter = {
sharedInternalDocumentStore: orama.internalDocumentStore,
enabled: true,
sortableProperties: [],
sortablePropertiesWithTypes: {},
Expand All @@ -35,7 +37,7 @@ function innerCreate(schema: Schema, sortableDeniedProperties: string[], prefix:

if (typeActualType === 'object' && !Array.isArray(type)) {
// Nested
const ret = innerCreate(type as Schema, sortableDeniedProperties, path)
const ret = innerCreate(orama, type as Schema, sortableDeniedProperties, path)
sorter.sortableProperties.push(...ret.sortableProperties)
sorter.sorts = {
...sorter.sorts,
Expand All @@ -55,7 +57,7 @@ function innerCreate(schema: Schema, sortableDeniedProperties: string[], prefix:
sorter.sortableProperties.push(path)
sorter.sortablePropertiesWithTypes[path] = type
sorter.sorts[path] = {
docs: createRecordWithToJson<number>(),
docs: {},
orderedDocs: [],
type: type,
}
Expand All @@ -73,14 +75,14 @@ function innerCreate(schema: Schema, sortableDeniedProperties: string[], prefix:
return sorter
}

async function create(_: Orama, schema: Schema, config?: SorterConfig): Promise<Sorter> {
async function create(orama: Orama, schema: Schema, config?: SorterConfig): Promise<Sorter> {
const isSortEnabled = config?.enabled !== false
if (!isSortEnabled) {
return {
disabled: true,
} as unknown as Sorter
}
return innerCreate(schema, (config || {}).unsortableProperties || [], '')
return innerCreate(orama, schema, (config || {}).unsortableProperties || [], '')
}

function stringSort(value: SortValue, language: string | undefined, d: [InternalDocumentID, SortValue]): boolean {
Expand Down Expand Up @@ -122,7 +124,7 @@ async function insert(
break
}

const internalId = getInternalDocumentId(id);
const internalId = getInternalDocumentId(sorter.sharedInternalDocumentStore, id);
// Find the right position to insert the element
let index = s.orderedDocs.findIndex(predicate)
if (index === -1) {
Expand All @@ -141,12 +143,12 @@ async function insert(
}
}

async function remove(sorter: Sorter, prop: string, id: DocumentID) {
async function remove(sorter: Sorter, prop: string, id: InternalDocumentStore) {
if (!sorter.enabled) {
return
}
const s = sorter.sorts[prop] as PropertySort<SortValue>;
const internalId = getInternalDocumentId(id);
const internalId = getInternalDocumentId(sorter.sharedInternalDocumentStore, id);

const index = s.docs[internalId]
delete s.docs[internalId]
Expand Down Expand Up @@ -216,7 +218,7 @@ async function getSortablePropertiesWithTypes(sorter: Sorter): Promise<Record<st
return sorter.sortablePropertiesWithTypes
}

export async function load<R = unknown>(raw: R): Promise<Sorter> {
export async function load<R = unknown>(sharedInternalDocumentStore: InternalDocumentIDStore, raw: R): Promise<Sorter> {
const rawDocument = raw as Sorter
if (!rawDocument.enabled) {
return {
Expand All @@ -225,6 +227,7 @@ export async function load<R = unknown>(raw: R): Promise<Sorter> {
}

return {
sharedInternalDocumentStore,
sortableProperties: rawDocument.sortableProperties,
sortablePropertiesWithTypes: rawDocument.sortablePropertiesWithTypes,
sorts: rawDocument.sorts,
Expand All @@ -247,13 +250,13 @@ export async function save<R = unknown>(sorter: Sorter): Promise<R> {
} as R
}

export async function createSorter(): Promise<DefaultSorter> {
export async function createSorter(sharedInternalDocumentIDs: InternalDocumentIDStore): Promise<DefaultSorter> {
return {
create,
insert,
remove,
save,
load,
load: load.bind(null, sharedInternalDocumentIDs),
sortBy,
getSortableProperties,
getSortablePropertiesWithTypes,
Expand Down
Loading

0 comments on commit 3dc5bd4

Please sign in to comment.