Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: issue with max call stack when search #437

Merged
merged 4 commits into from
Sep 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions packages/orama/src/components/groups.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import type { Orama, ScalarSearchableValue, TokenScore, GroupByParams, GroupResult, Result, Reduce } from '../types.js'
import { createError } from '../errors.js'
import { getNested, intersect } from '../utils.js'
import { getNested, intersect, safeArrayPush } from '../utils.js'
import { getDocumentIdFromInternalId } from './internal-document-id-store.js'

interface PropertyGroup {
Expand Down Expand Up @@ -169,7 +169,11 @@ function calculateCombination(arrs: ScalarSearchableValue[][], index = 0): Scala
const combinations = []
for (const value of head) {
for (const combination of c) {
combinations.push([value, ...combination])
const result = [value];

safeArrayPush(result, combination)

combinations.push(result)
micheleriva marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand Down
30 changes: 14 additions & 16 deletions packages/orama/src/components/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import {
Node as RadixNode,
removeDocumentByWord as radixRemoveDocument,
} from '../trees/radix.js'
import { intersect } from '../utils.js'
import { intersect, safeArrayPush } from '../utils.js';
import { BM25 } from './algorithms.js'
import { getInnerType, getVectorSize, isArrayType, isVectorType } from './defaults.js'
import {
Expand Down Expand Up @@ -462,7 +462,7 @@ export async function searchByWhereClause<I extends OpaqueIndex, D extends Opaqu
}

const filteredIDs = idx[operation.toString() as keyof BooleanIndex]
filtersMap[param].push(...filteredIDs)
safeArrayPush(filtersMap[param], filteredIDs);
continue
}

Expand All @@ -477,8 +477,7 @@ export async function searchByWhereClause<I extends OpaqueIndex, D extends Opaqu
const term = await context.tokenizer.tokenize(raw, context.language, param)
for (const t of term) {
const filteredIDsResults = radixFind(idx, { term: t, exact: true })
filtersMap[param].push(...Object.values(filteredIDsResults).flat())
}
safeArrayPush(filtersMap[param], Object.values(filteredIDsResults).flat())}
}

continue
Expand All @@ -499,38 +498,37 @@ export async function searchByWhereClause<I extends OpaqueIndex, D extends Opaqu
throw createError('UNKNOWN_FILTER_PROPERTY', param)
}

let filteredIDs = [];

switch (operationOpt) {
case 'gt': {
const filteredIDs = avlGreaterThan(AVLNode, operationValue, false)
filtersMap[param].push(...filteredIDs)
filteredIDs = avlGreaterThan(AVLNode, operationValue, false)
break
}
case 'gte': {
const filteredIDs = avlGreaterThan(AVLNode, operationValue, true)
filtersMap[param].push(...filteredIDs)
filteredIDs = avlGreaterThan(AVLNode, operationValue, true)
break
}
case 'lt': {
const filteredIDs = avlLessThan(AVLNode, operationValue, false)
filtersMap[param].push(...filteredIDs)
filteredIDs = avlLessThan(AVLNode, operationValue, false)
break
}
case 'lte': {
const filteredIDs = avlLessThan(AVLNode, operationValue, true)
filtersMap[param].push(...filteredIDs)
filteredIDs = avlLessThan(AVLNode, operationValue, true)
break
}
case 'eq': {
const filteredIDs = avlFind(AVLNode, operationValue) ?? []
filtersMap[param].push(...filteredIDs)
filteredIDs = avlFind(AVLNode, operationValue) ?? []
break
}
case 'between': {
const [min, max] = operationValue as number[]
const filteredIDs = avlRangeSearch(AVLNode, min, max)
filtersMap[param].push(...filteredIDs)
filteredIDs = avlRangeSearch(AVLNode, min, max)
break
}
}

safeArrayPush(filtersMap[param], filteredIDs)
}

// AND operation: calculate the intersection between all the IDs in filterMap
Expand Down
4 changes: 2 additions & 2 deletions packages/orama/src/methods/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import {
InternalDocumentID,
} from '../components/internal-document-id-store.js'
import { createError } from '../errors.js'
import { getNanosecondsTime, getNested, sortTokenScorePredicate } from '../utils.js'
import { getNanosecondsTime, getNested, sortTokenScorePredicate, safeArrayPush } from '../utils.js';

const defaultBM25Params: BM25Params = {
k: 1.2,
Expand Down Expand Up @@ -179,7 +179,7 @@ export async function search<AggValue = Result[]>(
// Lookup
const scoreList = await orama.index.search(context, index, prop, term)

context.indexMap[prop][term].push(...scoreList)
safeArrayPush(context.indexMap[prop][term], scoreList);
}

const docIds = context.indexMap[prop]
Expand Down
16 changes: 11 additions & 5 deletions packages/orama/src/trees/avl.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { safeArrayPush } from '../utils.js'

export type Node<K, V> = {
key: K
value: V
Expand Down Expand Up @@ -111,7 +113,7 @@ export function rangeSearch<K, V>(node: Node<K, V>, min: K, max: K): V {
}

if (node.key >= min && node.key <= max) {
result.push(...(node.value as V[]))
safeArrayPush(result, node.value as V[]);
}

if (node.key < max) {
Expand Down Expand Up @@ -139,11 +141,11 @@ export function greaterThan<K, V>(node: Node<K, V>, key: K, inclusive = false):
}

if (inclusive && node.key >= key) {
result.push(...(node.value as V[]))
safeArrayPush(result, node.value as V[]);
}

if (!inclusive && node.key > key) {
result.push(...(node.value as V[]))
safeArrayPush(result, node.value as V[]);
}

traverse(node.left as Node<K, V>)
Expand All @@ -170,11 +172,11 @@ export function lessThan<K, V>(node: Node<K, V>, key: K, inclusive = false): V {
}

if (inclusive && node.key <= key) {
result.push(...(node.value as V[]))
safeArrayPush(result, node.value as V[]);
}

if (!inclusive && node.key < key) {
result.push(...(node.value as V[]))
safeArrayPush(result, node.value as V[]);
}

traverse(node.left as Node<K, V>)
Expand Down Expand Up @@ -358,6 +360,10 @@ export function remove<K, V>(root: Node<K, V> | null, key: K): Node<K, V> | null
export function removeDocument<K, V>(root: Node<K, V[]>, id: V, key: K): void {
const node = getNodeByKey(root, key)!

if (!node) {
return
}

if (node.value.length === 1) {
remove(root, key)
return
Expand Down
26 changes: 26 additions & 0 deletions packages/orama/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,32 @@ const second = BigInt(1e9)

export const isServer = typeof window === 'undefined'

/**
* This value can be increased up to 100_000
* But i don't know if this value change from nodejs to nodejs
* So I will keep a safer value here.
*/
export const MAX_ARGUMENT_FOR_STACK = 65535;

/**
* This method is needed to used because of issues like: https://github.com/oramasearch/orama/issues/301
* that issue is caused because the array that is pushed is huge (>100k)
*
* @example
* ```ts
* safeArrayPush(myArray, [1, 2])
* ```
*/
export function safeArrayPush<T>(arr: T[], newArr: T[]): void {
if (newArr.length < MAX_ARGUMENT_FOR_STACK) {
Array.prototype.push.apply(arr, newArr)
} else {
for (let i = 0; i < newArr.length; i += MAX_ARGUMENT_FOR_STACK) {
Array.prototype.push.apply(arr, newArr.slice(i, i + MAX_ARGUMENT_FOR_STACK))
}
}
}

export function sprintf(template: string, ...args: (string | number)[]): string {
return template.replace(
/%(?:(?<position>\d+)\$)?(?<width>-?\d*\.?\d*)(?<type>[dfs])/g,
Expand Down