Skip to content

Commit

Permalink
fix: issue with max call stack when search (#437)
Browse files Browse the repository at this point in the history
Co-authored-by: Vincent Baronnet <vincent@rawpixel.com>
Co-authored-by: Alexandr Pestryakov <mertico@yandex.ru>
  • Loading branch information
3 people authored Sep 6, 2023
1 parent dab335d commit cdb2d5d
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 25 deletions.
8 changes: 6 additions & 2 deletions packages/orama/src/components/groups.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import type { Orama, ScalarSearchableValue, TokenScore, GroupByParams, GroupResult, Result, Reduce } from '../types.js'
import { createError } from '../errors.js'
import { getNested, intersect } from '../utils.js'
import { getNested, intersect, safeArrayPush } from '../utils.js'
import { getDocumentIdFromInternalId } from './internal-document-id-store.js'

interface PropertyGroup {
Expand Down Expand Up @@ -169,7 +169,11 @@ function calculateCombination(arrs: ScalarSearchableValue[][], index = 0): Scala
const combinations = []
for (const value of head) {
for (const combination of c) {
combinations.push([value, ...combination])
const result = [value];

safeArrayPush(result, combination)

combinations.push(result)
}
}

Expand Down
30 changes: 14 additions & 16 deletions packages/orama/src/components/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import {
Node as RadixNode,
removeDocumentByWord as radixRemoveDocument,
} from '../trees/radix.js'
import { intersect } from '../utils.js'
import { intersect, safeArrayPush } from '../utils.js';
import { BM25 } from './algorithms.js'
import { getInnerType, getVectorSize, isArrayType, isVectorType } from './defaults.js'
import {
Expand Down Expand Up @@ -462,7 +462,7 @@ export async function searchByWhereClause<I extends OpaqueIndex, D extends Opaqu
}

const filteredIDs = idx[operation.toString() as keyof BooleanIndex]
filtersMap[param].push(...filteredIDs)
safeArrayPush(filtersMap[param], filteredIDs);
continue
}

Expand All @@ -477,8 +477,7 @@ export async function searchByWhereClause<I extends OpaqueIndex, D extends Opaqu
const term = await context.tokenizer.tokenize(raw, context.language, param)
for (const t of term) {
const filteredIDsResults = radixFind(idx, { term: t, exact: true })
filtersMap[param].push(...Object.values(filteredIDsResults).flat())
}
safeArrayPush(filtersMap[param], Object.values(filteredIDsResults).flat())}
}

continue
Expand All @@ -499,38 +498,37 @@ export async function searchByWhereClause<I extends OpaqueIndex, D extends Opaqu
throw createError('UNKNOWN_FILTER_PROPERTY', param)
}

let filteredIDs = [];

switch (operationOpt) {
case 'gt': {
const filteredIDs = avlGreaterThan(AVLNode, operationValue, false)
filtersMap[param].push(...filteredIDs)
filteredIDs = avlGreaterThan(AVLNode, operationValue, false)
break
}
case 'gte': {
const filteredIDs = avlGreaterThan(AVLNode, operationValue, true)
filtersMap[param].push(...filteredIDs)
filteredIDs = avlGreaterThan(AVLNode, operationValue, true)
break
}
case 'lt': {
const filteredIDs = avlLessThan(AVLNode, operationValue, false)
filtersMap[param].push(...filteredIDs)
filteredIDs = avlLessThan(AVLNode, operationValue, false)
break
}
case 'lte': {
const filteredIDs = avlLessThan(AVLNode, operationValue, true)
filtersMap[param].push(...filteredIDs)
filteredIDs = avlLessThan(AVLNode, operationValue, true)
break
}
case 'eq': {
const filteredIDs = avlFind(AVLNode, operationValue) ?? []
filtersMap[param].push(...filteredIDs)
filteredIDs = avlFind(AVLNode, operationValue) ?? []
break
}
case 'between': {
const [min, max] = operationValue as number[]
const filteredIDs = avlRangeSearch(AVLNode, min, max)
filtersMap[param].push(...filteredIDs)
filteredIDs = avlRangeSearch(AVLNode, min, max)
break
}
}

safeArrayPush(filtersMap[param], filteredIDs)
}

// AND operation: calculate the intersection between all the IDs in filterMap
Expand Down
4 changes: 2 additions & 2 deletions packages/orama/src/methods/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import {
InternalDocumentID,
} from '../components/internal-document-id-store.js'
import { createError } from '../errors.js'
import { getNanosecondsTime, getNested, sortTokenScorePredicate } from '../utils.js'
import { getNanosecondsTime, getNested, sortTokenScorePredicate, safeArrayPush } from '../utils.js';

const defaultBM25Params: BM25Params = {
k: 1.2,
Expand Down Expand Up @@ -179,7 +179,7 @@ export async function search<AggValue = Result[]>(
// Lookup
const scoreList = await orama.index.search(context, index, prop, term)

context.indexMap[prop][term].push(...scoreList)
safeArrayPush(context.indexMap[prop][term], scoreList);
}

const docIds = context.indexMap[prop]
Expand Down
16 changes: 11 additions & 5 deletions packages/orama/src/trees/avl.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { safeArrayPush } from '../utils.js'

export type Node<K, V> = {
key: K
value: V
Expand Down Expand Up @@ -111,7 +113,7 @@ export function rangeSearch<K, V>(node: Node<K, V>, min: K, max: K): V {
}

if (node.key >= min && node.key <= max) {
result.push(...(node.value as V[]))
safeArrayPush(result, node.value as V[]);
}

if (node.key < max) {
Expand Down Expand Up @@ -139,11 +141,11 @@ export function greaterThan<K, V>(node: Node<K, V>, key: K, inclusive = false):
}

if (inclusive && node.key >= key) {
result.push(...(node.value as V[]))
safeArrayPush(result, node.value as V[]);
}

if (!inclusive && node.key > key) {
result.push(...(node.value as V[]))
safeArrayPush(result, node.value as V[]);
}

traverse(node.left as Node<K, V>)
Expand All @@ -170,11 +172,11 @@ export function lessThan<K, V>(node: Node<K, V>, key: K, inclusive = false): V {
}

if (inclusive && node.key <= key) {
result.push(...(node.value as V[]))
safeArrayPush(result, node.value as V[]);
}

if (!inclusive && node.key < key) {
result.push(...(node.value as V[]))
safeArrayPush(result, node.value as V[]);
}

traverse(node.left as Node<K, V>)
Expand Down Expand Up @@ -358,6 +360,10 @@ export function remove<K, V>(root: Node<K, V> | null, key: K): Node<K, V> | null
export function removeDocument<K, V>(root: Node<K, V[]>, id: V, key: K): void {
const node = getNodeByKey(root, key)!

if (!node) {
return
}

if (node.value.length === 1) {
remove(root, key)
return
Expand Down
26 changes: 26 additions & 0 deletions packages/orama/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,32 @@ const second = BigInt(1e9)

export const isServer = typeof window === 'undefined'

/**
* This value can be increased up to 100_000
* But i don't know if this value change from nodejs to nodejs
* So I will keep a safer value here.
*/
export const MAX_ARGUMENT_FOR_STACK = 65535;

/**
* This method is needed to used because of issues like: https://github.com/oramasearch/orama/issues/301
* that issue is caused because the array that is pushed is huge (>100k)
*
* @example
* ```ts
* safeArrayPush(myArray, [1, 2])
* ```
*/
export function safeArrayPush<T>(arr: T[], newArr: T[]): void {
if (newArr.length < MAX_ARGUMENT_FOR_STACK) {
Array.prototype.push.apply(arr, newArr)
} else {
for (let i = 0; i < newArr.length; i += MAX_ARGUMENT_FOR_STACK) {
Array.prototype.push.apply(arr, newArr.slice(i, i + MAX_ARGUMENT_FOR_STACK))
}
}
}

export function sprintf(template: string, ...args: (string | number)[]): string {
return template.replace(
/%(?:(?<position>\d+)\$)?(?<width>-?\d*\.?\d*)(?<type>[dfs])/g,
Expand Down

1 comment on commit cdb2d5d

@vercel
Copy link

@vercel vercel bot commented on cdb2d5d Sep 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.