From a38619ad9f559def1a0dbc13a366be7f56e43245 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vin=C3=ADcius=20Louren=C3=A7o?= Date: Tue, 11 Jul 2023 09:27:07 -0300 Subject: [PATCH] fixup! refactor: move the internal id to orama --- packages/orama/src/components/algorithms.ts | 19 +++-- .../orama/src/components/documents-store.ts | 10 +-- packages/orama/src/components/filters.ts | 6 +- packages/orama/src/components/groups.ts | 2 +- packages/orama/src/components/index.ts | 80 +++++++++++++------ .../components/internal-document-id-store.ts | 50 ++++++------ packages/orama/src/components/sorter.ts | 28 +++---- packages/orama/src/methods/create.ts | 4 +- packages/orama/src/methods/insert.ts | 10 +-- packages/orama/src/methods/remove.ts | 22 +++-- packages/orama/src/methods/search.ts | 17 ++-- packages/orama/src/trees/radix.ts | 6 +- packages/orama/src/types.ts | 22 ++--- 13 files changed, 163 insertions(+), 113 deletions(-) diff --git a/packages/orama/src/components/algorithms.ts b/packages/orama/src/components/algorithms.ts index 1ba7e2c23..c28f8af0c 100644 --- a/packages/orama/src/components/algorithms.ts +++ b/packages/orama/src/components/algorithms.ts @@ -1,12 +1,13 @@ import { createError } from '../errors.js' import { TokenScore, BM25Params } from '../types.js' +import { InternalDocumentID } from './internal-document-id-store.js'; export function prioritizeTokenScores(arrays: TokenScore[][], boost: number, threshold = 1): TokenScore[] { if (boost === 0) { throw createError('INVALID_BOOST_VALUE') } - const tokenMap: Record = {} + const tokenMap = new Map() const mapsLength = arrays.length for (let i = 0; i < mapsLength; i++) { @@ -16,17 +17,23 @@ export function prioritizeTokenScores(arrays: TokenScore[][], boost: number, thr for (let j = 0; j < entriesLength; j++) { const [token, score] = arr[j] const boostScore = score * boost + const oldScore = tokenMap.get(token) - if (token in tokenMap) { - tokenMap[token] *= 1.5 + boostScore + if (oldScore !== undefined) { + tokenMap.set(token, oldScore * 1.5 + boostScore) } else { - tokenMap[token] = boostScore + tokenMap.set(token, boostScore); } } } - const results = Object.keys(tokenMap) - .map(key => [+key, tokenMap[+key]] as TokenScore) + const tokenScores: TokenScore[] = [] + + for (const tokenStore of tokenMap.entries()) { + tokenScores.push(tokenStore); + } + + const results = tokenScores .sort((a, b) => b[1] - a[1]) // If threshold is 1, it means we will return all the results with at least one search term, diff --git a/packages/orama/src/components/documents-store.ts b/packages/orama/src/components/documents-store.ts index 33acba62a..1192a4734 100644 --- a/packages/orama/src/components/documents-store.ts +++ b/packages/orama/src/components/documents-store.ts @@ -1,5 +1,5 @@ -import { DocumentID, getInternalDocumentId, InternalDocumentID, InternalDocumentIDStore } from "./internal-document-id-store.js"; -import { Document, IDocumentsStore, OpaqueDocumentStore } from "../types.js"; +import { DocumentID, getInternalDocumentId, InternalDocumentID, InternalDocumentIDStore } from './internal-document-id-store.js'; +import { Document, IDocumentsStore, OpaqueDocumentStore } from '../types.js'; export interface DocumentsStore extends OpaqueDocumentStore { sharedInternalDocumentStore: InternalDocumentIDStore; @@ -17,7 +17,7 @@ export async function create(sharedInternalDocumentStore: InternalDocumentIDStor } } -export async function get(store: DocumentsStore, id: string): Promise { +export async function get(store: DocumentsStore, id: DocumentID): Promise { const internalId = getInternalDocumentId(store.sharedInternalDocumentStore, id); return store.docs[internalId] @@ -34,8 +34,8 @@ export async function getMultiple(store: DocumentsStore, ids: DocumentID[]): Pro return found } -export async function getAll(store: DocumentsStore): Promise> { - return store.docs as unknown as Record +export async function getAll(store: DocumentsStore): Promise> { + return store.docs as Record; } export async function store(store: DocumentsStore, id: DocumentID, doc: Document): Promise { diff --git a/packages/orama/src/components/filters.ts b/packages/orama/src/components/filters.ts index 76da449fe..5018f7f76 100644 --- a/packages/orama/src/components/filters.ts +++ b/packages/orama/src/components/filters.ts @@ -1,4 +1,6 @@ -export function intersectFilteredIDs(filtered: number[], lookedUp: [number, number][]): [number, number][] { +import { InternalDocumentID } from './internal-document-id-store.js'; + +export function intersectFilteredIDs(filtered: InternalDocumentID[], lookedUp: [InternalDocumentID, number][]): [InternalDocumentID, number][] { const map = new Map() const result: [number, number][] = [] @@ -8,7 +10,7 @@ export function intersectFilteredIDs(filtered: number[], lookedUp: [number, numb for (const [id, score] of lookedUp) { if (map.has(id)) { - result.push([+id, score]) + result.push([id, score]) map.delete(id) } } diff --git a/packages/orama/src/components/groups.ts b/packages/orama/src/components/groups.ts index 2d391d29e..a25566515 100644 --- a/packages/orama/src/components/groups.ts +++ b/packages/orama/src/components/groups.ts @@ -1,7 +1,7 @@ import type { Orama, ScalarSearchableValue, TokenScore, GroupByParams, GroupResult, Result, Reduce } from '../types.js' import { createError } from '../errors.js' import { getNested, intersect } from '../utils.js' -import { getDocumentIdFromInternalId } from "./internal-document-id-store.js"; +import { getDocumentIdFromInternalId } from './internal-document-id-store.js'; interface PropertyGroup { property: string diff --git a/packages/orama/src/components/index.ts b/packages/orama/src/components/index.ts index da79e19ae..a61b4c0eb 100644 --- a/packages/orama/src/components/index.ts +++ b/packages/orama/src/components/index.ts @@ -1,11 +1,41 @@ -import { DocumentID, getInternalDocumentId, InternalDocumentID, InternalDocumentIDStore } from "./internal-document-id-store.js"; -import { createError } from "../errors.js"; -import { create as avlCreate, find as avlFind, greaterThan as avlGreaterThan, insert as avlInsert, lessThan as avlLessThan, Node as AVLNode, rangeSearch as avlRangeSearch, removeDocument as avlRemoveDocument } from "../trees/avl.js"; -import { create as radixCreate, find as radixFind, insert as radixInsert, Node as RadixNode, removeDocumentByWord as radixRemoveDocument } from "../trees/radix.js"; -import { ArraySearchableType, BM25Params, ComparisonOperator, IIndex, OpaqueDocumentStore, OpaqueIndex, Orama, ScalarSearchableType, Schema, SearchableType, SearchableValue, SearchContext, Tokenizer, TokenScore } from "../types.js"; -import { intersect } from "../utils.js"; -import { BM25 } from "./algorithms.js"; -import { getInnerType, isArrayType } from "./defaults.js"; +import { createError } from '../errors.js' +import { + create as avlCreate, + find as avlFind, + greaterThan as avlGreaterThan, + insert as avlInsert, + lessThan as avlLessThan, + Node as AVLNode, + rangeSearch as avlRangeSearch, + removeDocument as avlRemoveDocument, +} from '../trees/avl.js' +import { + create as radixCreate, + find as radixFind, + insert as radixInsert, + Node as RadixNode, + removeDocumentByWord as radixRemoveDocument, +} from '../trees/radix.js' +import { + ArraySearchableType, + BM25Params, + ComparisonOperator, + IIndex, + OpaqueDocumentStore, + OpaqueIndex, + Orama, + ScalarSearchableType, + Schema, + SearchableType, + SearchableValue, + SearchContext, + Tokenizer, + TokenScore, +} from '../types.js' +import { intersect } from '../utils.js' +import { BM25 } from './algorithms.js' +import { getInnerType, isArrayType } from './defaults.js' +import { DocumentID, getInternalDocumentId, InternalDocumentID, InternalDocumentIDStore } from './internal-document-id-store.js' export type FrequencyMap = { [property: string]: { @@ -15,7 +45,7 @@ export type FrequencyMap = { } | undefined } -}; +} export type BooleanIndex = { true: InternalDocumentID[] @@ -38,11 +68,11 @@ export type DefaultIndex = IIndex export async function insertDocumentScoreParameters( index: Index, prop: string, - id: string, + id: DocumentID, tokens: string[], docsCount: number, ): Promise { - const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id); + const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id) index.avgFieldLength[prop] = ((index.avgFieldLength[prop] ?? 0) * (docsCount - 1) + tokens.length) / docsCount index.fieldLengths[prop][internalId] = tokens.length @@ -52,7 +82,7 @@ export async function insertDocumentScoreParameters( export async function insertTokenScoreParameters( index: Index, prop: string, - id: string, + id: DocumentID, tokens: string[], token: string, ): Promise { @@ -64,7 +94,7 @@ export async function insertTokenScoreParameters( } } - const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id); + const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id) const tf = tokenFrequency / tokens.length index.frequencies[prop][internalId]![token] = tf @@ -80,10 +110,10 @@ export async function insertTokenScoreParameters( export async function removeDocumentScoreParameters( index: Index, prop: string, - id: string, + id: DocumentID, docsCount: number, ): Promise { - const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id); + const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id) index.avgFieldLength[prop] = (index.avgFieldLength[prop] * docsCount - index.fieldLengths[prop][internalId]!) / (docsCount - 1) @@ -118,7 +148,7 @@ export async function calculateResultScores, index: Index, prop: string, - id: string, + id: DocumentID, value: SearchableValue, schemaType: ScalarSearchableType, language: string | undefined, tokenizer: Tokenizer, docsCount: number, ): Promise { - const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id); + const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id) switch (schemaType) { case 'boolean': { @@ -216,10 +246,10 @@ async function insertScalar( break case 'string': { const tokens = await tokenizer.tokenize(value as string, language, prop) - await implementation.insertDocumentScoreParameters(index, prop, id, tokens, docsCount) + await implementation.insertDocumentScoreParameters(index, prop, internalId, tokens, docsCount) for (const token of tokens) { - await implementation.insertTokenScoreParameters(index, prop, id, tokens, token) + await implementation.insertTokenScoreParameters(index, prop, internalId, tokens, token) radixInsert(index.indexes[prop] as RadixNode, token, internalId) } @@ -233,7 +263,7 @@ export async function insert( implementation: DefaultIndex, index: Index, prop: string, - id: string, + id: DocumentID, value: SearchableValue, schemaType: SearchableType, language: string | undefined, @@ -267,14 +297,14 @@ async function removeScalar( implementation: IIndex, index: Index, prop: string, - id: string, + id: DocumentID, value: SearchableValue, schemaType: ScalarSearchableType, language: string | undefined, tokenizer: Tokenizer, docsCount: number, ): Promise { - const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id); + const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id) switch (schemaType) { case 'number': { @@ -307,7 +337,7 @@ export async function remove( implementation: DefaultIndex, index: Index, prop: string, - id: string, + id: DocumentID, value: SearchableValue, schemaType: SearchableType, language: string | undefined, diff --git a/packages/orama/src/components/internal-document-id-store.ts b/packages/orama/src/components/internal-document-id-store.ts index b327a1f65..8959d05a2 100644 --- a/packages/orama/src/components/internal-document-id-store.ts +++ b/packages/orama/src/components/internal-document-id-store.ts @@ -1,14 +1,14 @@ import { Orama } from '../types.js'; -export type DocumentID = string | number; -export type InternalDocumentID = number; +export type DocumentID = string | number +export type InternalDocumentID = number export type InternalDocumentIDStore = { - idToInternalId: Map; - internalIdToId: string[]; - save: (store: InternalDocumentIDStore) => unknown; - load: (orama: Orama, raw: unknown) => void; -}; + idToInternalId: Map + internalIdToId: string[] + save: (store: InternalDocumentIDStore) => unknown + load: (orama: Orama, raw: unknown) => void +} export function createInternalDocumentIDStore(): InternalDocumentIDStore { return { @@ -16,51 +16,55 @@ export function createInternalDocumentIDStore(): InternalDocumentIDStore { internalIdToId: [], save, load, - }; + } } export function save(store: InternalDocumentIDStore): unknown { return { internalIdToId: store.internalIdToId, - }; + } } export function load(orama: Orama, raw: unknown): void { - const { internalIdToId } = raw as InternalDocumentIDStore; + const { internalIdToId } = raw as InternalDocumentIDStore - orama.internalDocumentIDStore.idToInternalId.clear(); - orama.internalDocumentIDStore.internalIdToId = []; + orama.internalDocumentIDStore.idToInternalId.clear() + orama.internalDocumentIDStore.internalIdToId = [] for (let i = 0; i < internalIdToId.length; i++) { - orama.internalDocumentIDStore.idToInternalId.set(internalIdToId[i], i + 1); - orama.internalDocumentIDStore.internalIdToId.push(internalIdToId[i]); + orama.internalDocumentIDStore.idToInternalId.set(internalIdToId[i], i + 1) + orama.internalDocumentIDStore.internalIdToId.push(internalIdToId[i]) } } export function getInternalDocumentId(store: InternalDocumentIDStore, id: DocumentID): InternalDocumentID { if (typeof id === "string") { - const internalId = store.idToInternalId.get(id); + const internalId = store.idToInternalId.get(id) if (internalId) { - return internalId; + return internalId } - const currentId = store.idToInternalId.size + 1; + const currentId = store.idToInternalId.size + 1 - store.idToInternalId.set(id, currentId); - store.internalIdToId.push(id); + store.idToInternalId.set(id, currentId) + store.internalIdToId.push(id) + + return currentId + } - return currentId; + if (id > store.internalIdToId.length) { + return getInternalDocumentId(store, id.toString()) } - return id as number; + return id } export function getDocumentIdFromInternalId(store: InternalDocumentIDStore, internalId: InternalDocumentID): string { if (store.internalIdToId.length < internalId) { - throw new Error(`Invalid internalId ${internalId}`); + throw new Error(`Invalid internalId ${internalId}`) } - return store.internalIdToId[internalId - 1]; + return store.internalIdToId[internalId - 1] } diff --git a/packages/orama/src/components/sorter.ts b/packages/orama/src/components/sorter.ts index 27f3b4acd..64ca5b962 100644 --- a/packages/orama/src/components/sorter.ts +++ b/packages/orama/src/components/sorter.ts @@ -1,15 +1,15 @@ -import { DocumentID, getInternalDocumentId, InternalDocumentID, InternalDocumentIDStore } from "./internal-document-id-store.js"; -import { createError } from "../errors.js"; -import { ISorter, OpaqueSorter, Orama, Schema, SorterConfig, SorterParams, SortType, SortValue } from "../types.js"; +import { createError } from '../errors.js' +import { ISorter, OpaqueSorter, Orama, Schema, SorterConfig, SorterParams, SortType, SortValue } from '../types.js' +import { DocumentID, getInternalDocumentId, InternalDocumentID, InternalDocumentIDStore } from './internal-document-id-store.js' interface PropertySort { - docs: Record; + docs: Record orderedDocs: [InternalDocumentID, K][] type: SortType } export interface Sorter extends OpaqueSorter { - sharedInternalDocumentStore: InternalDocumentIDStore; + sharedInternalDocumentStore: InternalDocumentIDStore enabled: boolean sortableProperties: string[] sortablePropertiesWithTypes: Record @@ -86,8 +86,8 @@ async function create(orama: Orama, schema: Schema, config?: SorterConfig): Prom } function stringSort(value: SortValue, language: string | undefined, d: [InternalDocumentID, SortValue]): boolean { - const d1Value = d[1] as string | symbol; - const dId = typeof d1Value === 'symbol' ? d1Value.description! : d1Value; + const d1Value = d[1] as string | symbol + const dId = typeof d1Value === 'symbol' ? d1Value.description! : d1Value return dId.localeCompare(value as string, language) > 0 } @@ -101,7 +101,7 @@ function booleanSort(value: SortValue, d: [InternalDocumentID, SortValue]): bool async function insert( sorter: Sorter, prop: string, - id: string, + id: DocumentID, value: SortValue, schemaType: SortType, language: string | undefined, @@ -124,7 +124,7 @@ async function insert( break } - const internalId = getInternalDocumentId(sorter.sharedInternalDocumentStore, id); + const internalId = getInternalDocumentId(sorter.sharedInternalDocumentStore, id) // Find the right position to insert the element let index = s.orderedDocs.findIndex(predicate) if (index === -1) { @@ -147,8 +147,8 @@ async function remove(sorter: Sorter, prop: string, id: DocumentID) { if (!sorter.enabled) { return } - const s = sorter.sorts[prop] as PropertySort; - const internalId = getInternalDocumentId(sorter.sharedInternalDocumentStore, id); + const s = sorter.sorts[prop] as PropertySort + const internalId = getInternalDocumentId(sorter.sharedInternalDocumentStore, id) const index = s.docs[internalId] delete s.docs[internalId] @@ -163,7 +163,7 @@ async function remove(sorter: Sorter, prop: string, id: DocumentID) { s.orderedDocs.splice(index, 1) } -async function sortBy(sorter: Sorter, docIds: [InternalDocumentID, number][], by: SorterParams): Promise<[InternalDocumentID, number][]> { +async function sortBy(sorter: Sorter, docIds: [DocumentID, number][], by: SorterParams): Promise<[DocumentID, number][]> { if (!sorter.enabled) { throw createError('SORT_DISABLED') } @@ -180,8 +180,8 @@ async function sortBy(sorter: Sorter, docIds: [InternalDocumentID, number][], by // This sort algorithm works leveraging on // that s.docs is a map of docId -> position // If a document is not indexed, it will be not present in the map - const indexOfA = s.docs[a[0]] - const indexOfB = s.docs[b[0]] + const indexOfA = s.docs[getInternalDocumentId(sorter.sharedInternalDocumentStore, a[0])] + const indexOfB = s.docs[getInternalDocumentId(sorter.sharedInternalDocumentStore, b[0])] const isAIndexed = typeof indexOfA !== 'undefined' const isBIndexed = typeof indexOfB !== 'undefined' diff --git a/packages/orama/src/methods/create.ts b/packages/orama/src/methods/create.ts index f481aadec..d8f12873a 100644 --- a/packages/orama/src/methods/create.ts +++ b/packages/orama/src/methods/create.ts @@ -3,7 +3,7 @@ import { createDocumentsStore } from '../components/documents-store.js' import { OBJECT_COMPONENTS, FUNCTION_COMPONENTS, SINGLE_OR_ARRAY_COMPONENTS } from '../components/hooks.js' import { createIndex } from '../components/index.js' import { createTokenizer } from '../components/tokenizer/index.js' -import { createInternalDocumentIDStore } from "../components/internal-document-id-store.js"; +import { createInternalDocumentIDStore } from '../components/internal-document-id-store.js' import { createError } from '../errors.js' import { uniqueId } from '../utils.js' import { @@ -121,7 +121,7 @@ export async function create

({ throw createError('NO_LANGUAGE_WITH_CUSTOM_TOKENIZER') } - const internalDocumentStore = createInternalDocumentIDStore(); + const internalDocumentStore = createInternalDocumentIDStore() index ||= await createIndex(internalDocumentStore) sorter ||= await createSorter(internalDocumentStore) diff --git a/packages/orama/src/methods/insert.ts b/packages/orama/src/methods/insert.ts index c241e14f1..b1c950db8 100644 --- a/packages/orama/src/methods/insert.ts +++ b/packages/orama/src/methods/insert.ts @@ -1,8 +1,8 @@ -import { isArrayType } from "../components.js"; -import { runMultipleHook, runSingleHook } from "../components/hooks.js"; -import { trackInsertion } from "../components/sync-blocking-checker.js"; -import { createError } from "../errors.js"; -import { Document, Orama, SortValue } from "../types.js"; +import { isArrayType } from '../components.js' +import { runMultipleHook, runSingleHook } from '../components/hooks.js' +import { trackInsertion } from '../components/sync-blocking-checker.js' +import { createError } from '../errors.js' +import { Document, Orama, SortValue } from '../types.js' export async function insert(orama: Orama, doc: Document, language?: string, skipHooks?: boolean): Promise { const errorProperty = await orama.validateSchema(doc, orama.schema) diff --git a/packages/orama/src/methods/remove.ts b/packages/orama/src/methods/remove.ts index c6c71af8a..ed45a918f 100644 --- a/packages/orama/src/methods/remove.ts +++ b/packages/orama/src/methods/remove.ts @@ -1,8 +1,9 @@ import { runMultipleHook, runSingleHook } from '../components/hooks.js' +import { DocumentID, getDocumentIdFromInternalId, getInternalDocumentId } from '../components/internal-document-id-store.js'; import { trackRemoval } from '../components/sync-blocking-checker.js' import { Orama } from '../types.js' -export async function remove(orama: Orama, id: string, language?: string, skipHooks?: boolean): Promise { +export async function remove(orama: Orama, id: DocumentID, language?: string, skipHooks?: boolean): Promise { let result = true const { index, docs } = orama.data @@ -11,10 +12,11 @@ export async function remove(orama: Orama, id: string, language?: string, skipHo return false } + const docId = getDocumentIdFromInternalId(orama.internalDocumentIDStore, getInternalDocumentId(orama.internalDocumentIDStore, id)) const docsCount = await orama.documentsStore.count(docs) if (!skipHooks) { - await runSingleHook(orama.beforeRemove, orama, id) + await runSingleHook(orama.beforeRemove, orama, docId) } const indexableProperties = await orama.index.getSearchableProperties(index) @@ -33,7 +35,7 @@ export async function remove(orama: Orama, id: string, language?: string, skipHo await orama.index.beforeRemove?.( orama.data.index, prop, - id, + docId, value, schemaType, language, @@ -55,7 +57,7 @@ export async function remove(orama: Orama, id: string, language?: string, skipHo ) { result = false } - await orama.index.afterRemove?.(orama.data.index, prop, id, value, schemaType, language, orama.tokenizer, docsCount) + await orama.index.afterRemove?.(orama.data.index, prop, docId, value, schemaType, language, orama.tokenizer, docsCount) } const sortableProperties = await orama.sorter.getSortableProperties(orama.data.sorting) @@ -70,7 +72,7 @@ export async function remove(orama: Orama, id: string, language?: string, skipHo } if (!skipHooks) { - await runSingleHook(orama.afterRemove, orama, id) + await runSingleHook(orama.afterRemove, orama, docId) } await orama.documentsStore.remove(orama.data.docs, id) @@ -81,7 +83,7 @@ export async function remove(orama: Orama, id: string, language?: string, skipHo export async function removeMultiple( orama: Orama, - ids: string[], + ids: DocumentID[], batchSize?: number, language?: string, skipHooks?: boolean, @@ -92,8 +94,12 @@ export async function removeMultiple( batchSize = 1000 } + const docIdsForHooks = skipHooks + ? [] + : ids.map(id => getDocumentIdFromInternalId(orama.internalDocumentIDStore, getInternalDocumentId(orama.internalDocumentIDStore, id))) + if (!skipHooks) { - await runMultipleHook(orama.beforeMultipleRemove, orama, ids) + await runMultipleHook(orama.beforeMultipleRemove, orama, docIdsForHooks) } await new Promise((resolve, reject) => { @@ -123,7 +129,7 @@ export async function removeMultiple( }) if (!skipHooks) { - await runMultipleHook(orama.afterMultipleRemove, orama, ids) + await runMultipleHook(orama.afterMultipleRemove, orama, docIdsForHooks) } return result diff --git a/packages/orama/src/methods/search.ts b/packages/orama/src/methods/search.ts index 102bcd421..145e08240 100644 --- a/packages/orama/src/methods/search.ts +++ b/packages/orama/src/methods/search.ts @@ -3,7 +3,7 @@ import { getFacets } from '../components/facets.js' import { intersectFilteredIDs } from '../components/filters.js' import { getGroups } from '../components/groups.js' import { runAfterSearch } from '../components/hooks.js' -import { getDocumentIdFromInternalId, InternalDocumentID } from "../components/internal-document-id-store.js"; +import { getDocumentIdFromInternalId, getInternalDocumentId, InternalDocumentID } from '../components/internal-document-id-store.js'; import { createError } from '../errors.js' import { BM25Params, @@ -22,7 +22,7 @@ import { OpaqueIndex, OpaqueDocumentStore, SearchableValue, TokenScore -} from "../types.js"; +} from '../types.js' import { getNanosecondsTime, getNested, sortTokenScorePredicate } from '../utils.js' const defaultBM25Params: BM25Params = { @@ -153,7 +153,7 @@ export async function search( // If filters are enabled, we need to get the IDs of the documents that match the filters. const hasFilters = Object.keys(params.where ?? {}).length > 0 - let whereFiltersIDs: number[] = [] + let whereFiltersIDs: InternalDocumentID[] = [] if (hasFilters) { whereFiltersIDs = await orama.index.searchByWhereClause(context, index, params.where!) @@ -204,7 +204,7 @@ export async function search( // Get unique doc IDs from uniqueDocsIDs map let uniqueDocsArray = Object.entries(context.uniqueDocsIDs) - .map(([id, score]) => [Number(id), score] as TokenScore); + .map(([id, score]) => [+id, score] as TokenScore) // If filters are enabled, we need to remove the IDs of the documents that don't match the filters. if (hasFilters) { @@ -213,7 +213,7 @@ export async function search( if (params.sortBy) { if (typeof params.sortBy === 'function') { - const ids = uniqueDocsArray.map(([id]) => id) as unknown as string[] + const ids = uniqueDocsArray.map(([id]) => id) const docs = await orama.documentsStore.getMultiple(orama.data.docs, ids) const docsWithIdAndScore: CustomSorterFunctionItem[] = docs.map((d, i) => [ uniqueDocsArray[i][0], @@ -224,6 +224,7 @@ export async function search( uniqueDocsArray = docsWithIdAndScore.map(([id, score]) => [id, score]) } else { uniqueDocsArray = await orama.sorter.sortBy(orama.data.sorting, uniqueDocsArray, params.sortBy) + .then(results => results.map(([id, score]) => [getInternalDocumentId(orama.internalDocumentIDStore, id), score])) } } else { uniqueDocsArray = uniqueDocsArray.sort(sortTokenScorePredicate) @@ -248,10 +249,10 @@ export async function search( if (typeof results !== 'undefined') { for (const result of results) { - if (!result) continue; + if (!result) continue - result.id = getDocumentIdFromInternalId(orama.internalDocumentIDStore, +result.id); - searchResult.hits.push(result); + result.id = getDocumentIdFromInternalId(orama.internalDocumentIDStore, +result.id) + searchResult.hits.push(result) } } diff --git a/packages/orama/src/trees/radix.ts b/packages/orama/src/trees/radix.ts index bf5d1d95f..72072eb46 100644 --- a/packages/orama/src/trees/radix.ts +++ b/packages/orama/src/trees/radix.ts @@ -1,6 +1,6 @@ -import { syncBoundedLevenshtein } from "../components/levenshtein.js"; -import { InternalDocumentID } from "../components/internal-document-id-store.js"; -import { getOwnProperty } from "../utils.js"; +import { syncBoundedLevenshtein } from '../components/levenshtein.js' +import { InternalDocumentID } from '../components/internal-document-id-store.js' +import { getOwnProperty } from '../utils.js' export interface Node { key: string diff --git a/packages/orama/src/types.ts b/packages/orama/src/types.ts index dba655bff..68b0ead0d 100644 --- a/packages/orama/src/types.ts +++ b/packages/orama/src/types.ts @@ -1,5 +1,5 @@ -import { Language } from "./components/tokenizer/languages.js"; -import { DocumentID, InternalDocumentID, InternalDocumentIDStore } from "./components/internal-document-id-store.js"; +import { Language } from './components/tokenizer/languages.js' +import { DocumentID, InternalDocumentID, InternalDocumentIDStore } from './components/internal-document-id-store.js' export type Nullable = T | null @@ -411,7 +411,7 @@ export interface IIndex { implementation: IIndex, index: I, prop: string, - id: string, + id: DocumentID, value: SearchableValue, schemaType: SearchableType, language: string | undefined, @@ -425,7 +425,7 @@ export interface IIndex { implementation: IIndex, index: I, prop: string, - id: string, + id: DocumentID, value: SearchableValue, schemaType: SearchableType, language: string | undefined, @@ -437,12 +437,12 @@ export interface IIndex { insertDocumentScoreParameters( index: I, prop: string, - id: string, + id: DocumentID, tokens: string[], docsCount: number, ): SyncOrAsyncValue - insertTokenScoreParameters(index: I, prop: string, id: string, tokens: string[], token: string): SyncOrAsyncValue - removeDocumentScoreParameters(index: I, prop: string, id: string, docsCount: number): SyncOrAsyncValue + insertTokenScoreParameters(index: I, prop: string, id: DocumentID, tokens: string[], token: string): SyncOrAsyncValue + removeDocumentScoreParameters(index: I, prop: string, id: DocumentID, docsCount: number): SyncOrAsyncValue removeTokenScoreParameters(index: I, prop: string, token: string): SyncOrAsyncValue calculateResultScores( context: SearchContext, @@ -477,7 +477,7 @@ export interface IDocumentsStore get(store: D, id: DocumentID): SyncOrAsyncValue getMultiple(store: D, ids: DocumentID[]): SyncOrAsyncValue<(Document | undefined)[]> - getAll(store: D): SyncOrAsyncValue> + getAll(store: D): SyncOrAsyncValue> store(store: D, id: DocumentID, doc: Document): SyncOrAsyncValue remove(store: D, id: DocumentID): SyncOrAsyncValue count(store: D): SyncOrAsyncValue @@ -500,17 +500,17 @@ export interface ISorter { insert: ( sorter: So, prop: string, - id: string, + id: DocumentID, value: SortValue, schemaType: SortType, language: string | undefined, ) => SyncOrAsyncValue - remove: (sorter: So, prop: string, id: string) => SyncOrAsyncValue + remove: (sorter: So, prop: string, id: DocumentID) => SyncOrAsyncValue load(raw: R): SyncOrAsyncValue save(sorter: So): SyncOrAsyncValue - sortBy(sorter: So, docIds: [InternalDocumentID, number][], by: SorterParams): Promise<[InternalDocumentID, number][]> + sortBy(sorter: So, docIds: [DocumentID, number][], by: SorterParams): Promise<[DocumentID, number][]> getSortableProperties(sorter: So): SyncOrAsyncValue getSortablePropertiesWithTypes(sorter: So): SyncOrAsyncValue>