diff --git a/packages/orama/src/components/documents-store.ts b/packages/orama/src/components/documents-store.ts index ab16e90a5..0f5c3403a 100644 --- a/packages/orama/src/components/documents-store.ts +++ b/packages/orama/src/components/documents-store.ts @@ -1,31 +1,33 @@ -import { createRecordWithToJson, DocumentID, getInternalDocumentId, InternalDocumentID } from "../document-id.js"; +import { InternalDocumentStore, getInternalDocumentId, InternalDocumentID, InternalDocumentIDStore } from "./internal-document-store.js"; import { Document, IDocumentsStore, OpaqueDocumentStore } from "../types.js"; export interface DocumentsStore extends OpaqueDocumentStore { + sharedInternalDocumentStore: InternalDocumentIDStore; docs: Record, count: number } export type DefaultDocumentsStore = IDocumentsStore -export async function create(): Promise { +export async function create(sharedInternalDocumentStore: InternalDocumentIDStore): Promise { return { - docs: createRecordWithToJson(), + sharedInternalDocumentStore, + docs: {}, count: 0, } } export async function get(store: DocumentsStore, id: string): Promise { - const internalId = getInternalDocumentId(id); + const internalId = getInternalDocumentId(store.sharedInternalDocumentStore, id); return store.docs[internalId] } -export async function getMultiple(store: DocumentsStore, ids: DocumentID[]): Promise<(Document | undefined)[]> { +export async function getMultiple(store: DocumentsStore, ids: InternalDocumentStore[]): Promise<(Document | undefined)[]> { const found: (Document | undefined)[] = Array.from({ length: ids.length }) for (let i = 0; i < ids.length; i++) { - const internalId = getInternalDocumentId(ids[i]) + const internalId = getInternalDocumentId(store.sharedInternalDocumentStore, ids[i]) found[i] = store.docs[internalId] } @@ -36,8 +38,8 @@ export async function getAll(store: DocumentsStore): Promise } -export async function store(store: DocumentsStore, id: DocumentID, doc: Document): Promise { - const internalId = getInternalDocumentId(id); +export async function store(store: DocumentsStore, id: InternalDocumentStore, doc: Document): Promise { + const internalId = getInternalDocumentId(store.sharedInternalDocumentStore, id); if (typeof store.docs[internalId] !== 'undefined') { return false @@ -49,8 +51,8 @@ export async function store(store: DocumentsStore, id: DocumentID, doc: Document return true } -export async function remove(store: DocumentsStore, id: DocumentID): Promise { - const internalId = getInternalDocumentId(id); +export async function remove(store: DocumentsStore, id: InternalDocumentStore): Promise { + const internalId = getInternalDocumentId(store.sharedInternalDocumentStore, id); if (typeof store.docs[internalId] === 'undefined') { return false @@ -66,12 +68,13 @@ export async function count(store: DocumentsStore): Promise { return store.count } -export async function load(raw: R): Promise { +export async function load(sharedInternalDocumentStore: InternalDocumentIDStore, raw: R): Promise { const rawDocument = raw as DocumentsStore return { docs: rawDocument.docs, count: rawDocument.count, + sharedInternalDocumentStore, } } @@ -82,16 +85,16 @@ export async function save(store: DocumentsStore): Promise { } as R } -export async function createDocumentsStore(): Promise { +export async function createDocumentsStore(sharedInternalDocumentStore: InternalDocumentIDStore): Promise { return { - create, + create: create.bind(null, sharedInternalDocumentStore), get, getMultiple, getAll, store, remove, count, - load, + load: load.bind(null, sharedInternalDocumentStore), save, } } diff --git a/packages/orama/src/components/index.ts b/packages/orama/src/components/index.ts index 394ff156c..d4e526f94 100644 --- a/packages/orama/src/components/index.ts +++ b/packages/orama/src/components/index.ts @@ -1,4 +1,4 @@ -import { createRecordWithToJson, DocumentID, getInternalDocumentId, InternalDocumentID } from "../document-id.js"; +import { InternalDocumentStore, getInternalDocumentId, InternalDocumentID, InternalDocumentIDStore } from "./internal-document-store.js"; import { createError } from "../errors.js"; import { create as avlCreate, find as avlFind, greaterThan as avlGreaterThan, insert as avlInsert, lessThan as avlLessThan, Node as AVLNode, rangeSearch as avlRangeSearch, removeDocument as avlRemoveDocument } from "../trees/avl.js"; import { create as radixCreate, find as radixFind, insert as radixInsert, Node as RadixNode, removeDocumentByWord as radixRemoveDocument } from "../trees/radix.js"; @@ -23,6 +23,7 @@ export type BooleanIndex = { } export interface Index extends OpaqueIndex { + sharedInternalDocumentStore: InternalDocumentIDStore indexes: Record | BooleanIndex> searchableProperties: string[] searchablePropertiesWithTypes: Record @@ -41,7 +42,7 @@ export async function insertDocumentScoreParameters( tokens: string[], docsCount: number, ): Promise { - const internalId = getInternalDocumentId(id); + const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id); index.avgFieldLength[prop] = ((index.avgFieldLength[prop] ?? 0) * (docsCount - 1) + tokens.length) / docsCount index.fieldLengths[prop][internalId] = tokens.length @@ -63,7 +64,7 @@ export async function insertTokenScoreParameters( } } - const internalId = getInternalDocumentId(id); + const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id); const tf = tokenFrequency / tokens.length index.frequencies[prop][internalId]![token] = tf @@ -82,7 +83,7 @@ export async function removeDocumentScoreParameters( id: string, docsCount: number, ): Promise { - const internalId = getInternalDocumentId(id); + const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id); index.avgFieldLength[prop] = (index.avgFieldLength[prop] * docsCount - index.fieldLengths[prop][internalId]!) / (docsCount - 1) @@ -99,7 +100,7 @@ export async function calculateResultScores { const documentIDs = Array.from(ids) @@ -117,7 +118,7 @@ export async function calculateResultScores { if (!index) { index = { + sharedInternalDocumentStore: orama.internalDocumentStore, indexes: {}, searchableProperties: [], searchablePropertiesWithTypes: {}, @@ -175,7 +177,7 @@ export async function create( case 'string[]': index.indexes[path] = radixCreate() index.avgFieldLength[path] = 0 - index.frequencies[path] = createRecordWithToJson(); + index.frequencies[path] = {}; index.tokenOccurrences[path] = {} index.fieldLengths[path] = {} break @@ -201,7 +203,7 @@ async function insertScalar( tokenizer: Tokenizer, docsCount: number, ): Promise { - const internalId = getInternalDocumentId(id); + const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id); switch (schemaType) { case 'boolean': { @@ -272,7 +274,7 @@ async function removeScalar( tokenizer: Tokenizer, docsCount: number, ): Promise { - const internalId = getInternalDocumentId(id); + const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id); switch (schemaType) { case 'number': { @@ -471,7 +473,7 @@ export async function getSearchablePropertiesWithTypes(index: Index): Promise(raw: R): Promise { +export async function load(sharedInternalDocumentStore: InternalDocumentIDStore, raw: R): Promise { const { indexes, searchableProperties, @@ -483,6 +485,7 @@ export async function load(raw: R): Promise { } = raw as Index return { + sharedInternalDocumentStore, indexes, searchableProperties, searchablePropertiesWithTypes, @@ -515,7 +518,7 @@ export async function save(index: Index): Promise { } as R } -export async function createIndex(): Promise { +export async function createIndex(sharedInternalDocumentStore: InternalDocumentIDStore): Promise { return { create, insert, @@ -529,7 +532,7 @@ export async function createIndex(): Promise { searchByWhereClause, getSearchableProperties, getSearchablePropertiesWithTypes, - load, + load: load.bind(null, sharedInternalDocumentStore), save, } } diff --git a/packages/orama/src/components/internal-document-store.ts b/packages/orama/src/components/internal-document-store.ts new file mode 100644 index 000000000..b71c282b9 --- /dev/null +++ b/packages/orama/src/components/internal-document-store.ts @@ -0,0 +1,30 @@ +export type InternalDocumentStore = string | number; +export type InternalDocumentID = number; + +const InternalDocumentIDCounter = Symbol('InternalDocumentIDCounter'); + +export type InternalDocumentIDStore = Record & { [InternalDocumentIDCounter]: number }; + +export function createInternalDocumentIDStore(): InternalDocumentIDStore { + return { + [InternalDocumentIDCounter]: 0 + }; +} + +export function getInternalDocumentId(store: InternalDocumentIDStore, id: InternalDocumentStore): InternalDocumentID { + if (typeof id === "string" && isNaN(+id)) { + const internalId = store[id]; + + if (internalId) { + return internalId; + } + + const currentId = ++store[InternalDocumentIDCounter]; + + store[id] = currentId; + + return currentId; + } + + return id as number; +} diff --git a/packages/orama/src/components/sorter.ts b/packages/orama/src/components/sorter.ts index 29276ec42..25b9e1489 100644 --- a/packages/orama/src/components/sorter.ts +++ b/packages/orama/src/components/sorter.ts @@ -1,4 +1,4 @@ -import { createRecordWithToJson, DocumentID, getInternalDocumentId, InternalDocumentID } from "../document-id.js"; +import { InternalDocumentStore, getInternalDocumentId, InternalDocumentID, InternalDocumentIDStore } from "./internal-document-store.js"; import { createError } from "../errors.js"; import { ISorter, OpaqueSorter, Orama, Schema, SorterConfig, SorterParams, SortType, SortValue } from "../types.js"; @@ -9,6 +9,7 @@ interface PropertySort { } export interface Sorter extends OpaqueSorter { + sharedInternalDocumentStore: InternalDocumentIDStore; enabled: boolean sortableProperties: string[] sortablePropertiesWithTypes: Record @@ -17,8 +18,9 @@ export interface Sorter extends OpaqueSorter { export type DefaultSorter = ISorter -function innerCreate(schema: Schema, sortableDeniedProperties: string[], prefix: string): Sorter { +function innerCreate(orama: Orama, schema: Schema, sortableDeniedProperties: string[], prefix: string): Sorter { const sorter: Sorter = { + sharedInternalDocumentStore: orama.internalDocumentStore, enabled: true, sortableProperties: [], sortablePropertiesWithTypes: {}, @@ -35,7 +37,7 @@ function innerCreate(schema: Schema, sortableDeniedProperties: string[], prefix: if (typeActualType === 'object' && !Array.isArray(type)) { // Nested - const ret = innerCreate(type as Schema, sortableDeniedProperties, path) + const ret = innerCreate(orama, type as Schema, sortableDeniedProperties, path) sorter.sortableProperties.push(...ret.sortableProperties) sorter.sorts = { ...sorter.sorts, @@ -55,7 +57,7 @@ function innerCreate(schema: Schema, sortableDeniedProperties: string[], prefix: sorter.sortableProperties.push(path) sorter.sortablePropertiesWithTypes[path] = type sorter.sorts[path] = { - docs: createRecordWithToJson(), + docs: {}, orderedDocs: [], type: type, } @@ -73,14 +75,14 @@ function innerCreate(schema: Schema, sortableDeniedProperties: string[], prefix: return sorter } -async function create(_: Orama, schema: Schema, config?: SorterConfig): Promise { +async function create(orama: Orama, schema: Schema, config?: SorterConfig): Promise { const isSortEnabled = config?.enabled !== false if (!isSortEnabled) { return { disabled: true, } as unknown as Sorter } - return innerCreate(schema, (config || {}).unsortableProperties || [], '') + return innerCreate(orama, schema, (config || {}).unsortableProperties || [], '') } function stringSort(value: SortValue, language: string | undefined, d: [InternalDocumentID, SortValue]): boolean { @@ -122,7 +124,7 @@ async function insert( break } - const internalId = getInternalDocumentId(id); + const internalId = getInternalDocumentId(sorter.sharedInternalDocumentStore, id); // Find the right position to insert the element let index = s.orderedDocs.findIndex(predicate) if (index === -1) { @@ -141,12 +143,12 @@ async function insert( } } -async function remove(sorter: Sorter, prop: string, id: DocumentID) { +async function remove(sorter: Sorter, prop: string, id: InternalDocumentStore) { if (!sorter.enabled) { return } const s = sorter.sorts[prop] as PropertySort; - const internalId = getInternalDocumentId(id); + const internalId = getInternalDocumentId(sorter.sharedInternalDocumentStore, id); const index = s.docs[internalId] delete s.docs[internalId] @@ -216,7 +218,7 @@ async function getSortablePropertiesWithTypes(sorter: Sorter): Promise(raw: R): Promise { +export async function load(sharedInternalDocumentStore: InternalDocumentIDStore, raw: R): Promise { const rawDocument = raw as Sorter if (!rawDocument.enabled) { return { @@ -225,6 +227,7 @@ export async function load(raw: R): Promise { } return { + sharedInternalDocumentStore, sortableProperties: rawDocument.sortableProperties, sortablePropertiesWithTypes: rawDocument.sortablePropertiesWithTypes, sorts: rawDocument.sorts, @@ -247,13 +250,13 @@ export async function save(sorter: Sorter): Promise { } as R } -export async function createSorter(): Promise { +export async function createSorter(sharedInternalDocumentIDs: InternalDocumentIDStore): Promise { return { create, insert, remove, save, - load, + load: load.bind(null, sharedInternalDocumentIDs), sortBy, getSortableProperties, getSortablePropertiesWithTypes, diff --git a/packages/orama/src/document-id.ts b/packages/orama/src/document-id.ts deleted file mode 100644 index da3d440eb..000000000 --- a/packages/orama/src/document-id.ts +++ /dev/null @@ -1,46 +0,0 @@ -export type DocumentID = string | number; -export type InternalDocumentID = number; - -let currentId = 0; -const internalDocumentIdMap: Record = {}; - -export function getInternalDocumentId(id: DocumentID): InternalDocumentID { - if (typeof id === 'string' && isNaN(+id)) { - const internalId = internalDocumentIdMap[id]; - - if (internalId) { - return internalId; - } - - internalDocumentIdMap[id] = ++currentId; - - return currentId; - } - - return id as number; -} - -export type SymbolMapJsonTransformer = { - toJSON: typeof toJSONIntMapped; -} - -export function createRecordWithToJson(): Record { - const map = {}; - - Object.defineProperty(map, 'toJSON', { - value: toJSONIntMapped, - enumerable: false, - configurable: false, - writable: false, - }); - - return map; -} - -export function toJSONIntMapped(this: Record): Record { - return Object.entries(this).reduce((obj, [key, internalId]) => { - obj[key] = this[internalId]; - - return obj; - }, { } as Record) -} diff --git a/packages/orama/src/methods/create.ts b/packages/orama/src/methods/create.ts index 21d4cc94e..f48f92ded 100644 --- a/packages/orama/src/methods/create.ts +++ b/packages/orama/src/methods/create.ts @@ -3,6 +3,7 @@ import { createDocumentsStore } from '../components/documents-store.js' import { OBJECT_COMPONENTS, FUNCTION_COMPONENTS, SINGLE_OR_ARRAY_COMPONENTS } from '../components/hooks.js' import { createIndex } from '../components/index.js' import { createTokenizer } from '../components/tokenizer/index.js' +import { createInternalDocumentIDStore, InternalDocumentStore, InternalDocumentID, InternalDocumentIDStore } from "../components/internal-document-store.js"; import { createError } from '../errors.js' import { uniqueId } from '../utils.js' import { @@ -120,9 +121,11 @@ export async function create

({ throw createError('NO_LANGUAGE_WITH_CUSTOM_TOKENIZER') } - index ||= await createIndex() - sorter ||= await createSorter() - documentsStore ||= await createDocumentsStore() + const internalDocumentStore = createInternalDocumentIDStore(); + + index ||= await createIndex(internalDocumentStore) + sorter ||= await createSorter(internalDocumentStore) + documentsStore ||= await createDocumentsStore(internalDocumentStore) // Validate all other components validateComponents(components) @@ -156,6 +159,7 @@ export async function create

({ index, sorter, documentsStore, + internalDocumentStore, getDocumentProperties, getDocumentIndexId, validateSchema, diff --git a/packages/orama/src/methods/insert.ts b/packages/orama/src/methods/insert.ts index c1366aba5..c241e14f1 100644 --- a/packages/orama/src/methods/insert.ts +++ b/packages/orama/src/methods/insert.ts @@ -1,7 +1,6 @@ import { isArrayType } from "../components.js"; import { runMultipleHook, runSingleHook } from "../components/hooks.js"; import { trackInsertion } from "../components/sync-blocking-checker.js"; -import { getInternalDocumentId, InternalDocumentID } from "../document-id.js"; import { createError } from "../errors.js"; import { Document, Orama, SortValue } from "../types.js"; diff --git a/packages/orama/src/methods/search.ts b/packages/orama/src/methods/search.ts index a96208d51..e36d8d5f1 100644 --- a/packages/orama/src/methods/search.ts +++ b/packages/orama/src/methods/search.ts @@ -3,7 +3,7 @@ import { getFacets } from '../components/facets.js' import { intersectFilteredIDs } from '../components/filters.js' import { getGroups } from '../components/groups.js' import { runAfterSearch } from '../components/hooks.js' -import { InternalDocumentID } from "../document-id.js"; +import { InternalDocumentID } from "../components/internal-document-store.js"; import { createError } from '../errors.js' import { BM25Params, diff --git a/packages/orama/src/trees/radix.ts b/packages/orama/src/trees/radix.ts index 22353fa38..adc5b06ef 100644 --- a/packages/orama/src/trees/radix.ts +++ b/packages/orama/src/trees/radix.ts @@ -1,13 +1,10 @@ -import { syncBoundedLevenshtein } from '../components/levenshtein.js' -import { getInternalDocumentId, InternalDocumentID } from "../document-id.js"; -import { Nullable } from '../types.js' -import { getOwnProperty, syncUniqueId } from '../utils.js' +import { syncBoundedLevenshtein } from "../components/levenshtein.js"; +import { InternalDocumentID } from "../components/internal-document-store.js"; +import { getOwnProperty } from "../utils.js"; export interface Node { - id: InternalDocumentID key: string subWord: string - parent: Nullable children: Record docs: InternalDocumentID[] end: boolean @@ -30,7 +27,6 @@ function serialize(this: Node): object { } function updateParent(node: Node, parent: Node): void { - node.parent = parent.id node.word = parent.word + node.subWord } @@ -112,7 +108,6 @@ function getCommonPrefix(a: string, b: string) { export function create(end = false, subWord = '', key = ''): Node { const node = { - id: getInternalDocumentId(syncUniqueId()), key, subWord, parent: null, diff --git a/packages/orama/src/types.ts b/packages/orama/src/types.ts index 73b84771f..3fd1164e4 100644 --- a/packages/orama/src/types.ts +++ b/packages/orama/src/types.ts @@ -1,5 +1,5 @@ import { Language } from "./components/tokenizer/languages.js"; -import { DocumentID, InternalDocumentID } from "./document-id.js"; +import { InternalDocumentStore, InternalDocumentID, InternalDocumentIDStore } from "./components/internal-document-store.js"; export type Nullable = T | null @@ -449,7 +449,7 @@ export interface IIndex { index: I, prop: string, term: string, - ids: DocumentID[], + ids: InternalDocumentStore[], ): SyncOrAsyncValue search( @@ -475,11 +475,11 @@ export interface IDocumentsStore( orama: Orama<{ Schema: S; Index: I; DocumentStore: D; Sorter: So }>, ): SyncOrAsyncValue - get(store: D, id: DocumentID): SyncOrAsyncValue - getMultiple(store: D, ids: DocumentID[]): SyncOrAsyncValue<(Document | undefined)[]> + get(store: D, id: InternalDocumentStore): SyncOrAsyncValue + getMultiple(store: D, ids: InternalDocumentStore[]): SyncOrAsyncValue<(Document | undefined)[]> getAll(store: D): SyncOrAsyncValue> - store(store: D, id: DocumentID, doc: Document): SyncOrAsyncValue - remove(store: D, id: DocumentID): SyncOrAsyncValue + store(store: D, id: InternalDocumentStore, doc: Document): SyncOrAsyncValue + remove(store: D, id: InternalDocumentStore): SyncOrAsyncValue count(store: D): SyncOrAsyncValue load(raw: R): SyncOrAsyncValue @@ -601,6 +601,7 @@ type Internals

= { documentsStore: IDocumentsStore sorter: ISorter data: Data + internalDocumentStore: InternalDocumentIDStore caches: Record [kInsertions]: number | undefined [kRemovals]: number | undefined diff --git a/packages/orama/src/utils.ts b/packages/orama/src/utils.ts index 059f8bd7d..22e12956a 100644 --- a/packages/orama/src/utils.ts +++ b/packages/orama/src/utils.ts @@ -88,11 +88,6 @@ export async function uniqueId(): Promise { return `${baseId}-${lastId++}` } -// This is only used internally, keep in sync with the previous one -export function syncUniqueId(): string { - return `${baseId}-${lastId++}` -} - export function getOwnProperty(object: Record, property: string): T | undefined { // Checks if `hasOwn` method is defined avoiding errors with older Node.js versions if (Object.hasOwn === undefined) { diff --git a/packages/orama/tests/array.test.ts b/packages/orama/tests/array.test.ts index 1ee9ff6c7..5744b8f60 100644 --- a/packages/orama/tests/array.test.ts +++ b/packages/orama/tests/array.test.ts @@ -1,5 +1,5 @@ import t from 'tap' -import { getInternalDocumentId } from "../src/document-id.js"; +import { getInternalDocumentId } from "../src/components/internal-document-store.js"; import { create, getByID, insert, insertMultiple, load, remove, save, search, update } from '../src/index.js' t.test('create should support array of string', async t => { diff --git a/packages/orama/tests/sort.test.ts b/packages/orama/tests/sort.test.ts index c6602f499..a2fe91c73 100644 --- a/packages/orama/tests/sort.test.ts +++ b/packages/orama/tests/sort.test.ts @@ -24,6 +24,8 @@ t.test('search with sortBy', t => { }, }) + console.log(result); + t.strictSame( result.hits.map(d => d.id), [id5, id2, id1, id3, id4, id6], diff --git a/packages/orama/tests/tree.radix.test.ts b/packages/orama/tests/tree.radix.test.ts index 06e0de071..f0bf6ac8e 100644 --- a/packages/orama/tests/tree.radix.test.ts +++ b/packages/orama/tests/tree.radix.test.ts @@ -9,16 +9,16 @@ import { } from '../src/trees/radix.js' const phrases = [ - { id: '1', doc: 'the quick, brown fox' }, - { id: '2', doc: 'jumps over the lazy dog' }, - { id: '3', doc: 'just in time!' }, - { id: '4', doc: 'there is something wrong in there' }, - { id: '5', doc: 'this is me' }, - { id: '6', doc: 'thought it was sunday' }, - { id: '7', doc: "let's try this trie" }, - { id: '8', doc: 'primo' }, - { id: '9', doc: 'primate' }, - { id: '10', doc: 'prova' }, + { id: 1, doc: 'the quick, brown fox' }, + { id: 2, doc: 'jumps over the lazy dog' }, + { id: 3, doc: 'just in time!' }, + { id: 4, doc: 'there is something wrong in there' }, + { id: 5, doc: 'this is me' }, + { id: 6, doc: 'thought it was sunday' }, + { id: 7, doc: "let's try this trie" }, + { id: 8, doc: 'primo' }, + { id: 9, doc: 'primate' }, + { id: 10, doc: 'prova' }, ] t.test('radix tree', t => {