Skip to content

Commit

Permalink
Implements enum[] (#482)
Browse files Browse the repository at this point in the history
  • Loading branch information
allevo authored Sep 19, 2023
1 parent ba29008 commit 40562cc
Show file tree
Hide file tree
Showing 14 changed files with 484 additions and 53 deletions.
1 change: 1 addition & 0 deletions packages/docs/pages/usage/create.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Orama supports the following types:
| `string[]` | An array of strings. | `['red', 'green', 'blue']` |
| `number[]` | An array of numbers. | `[42, 91, 28.5]` |
| `boolean[]` | An array of booleans. | `[true, false, false]` |
| `enum[]` | An array of enums. | `['comedy', 'action', 'romance']` |
| `vector[<size>]` | A vector of numbers to perform vector search on. | `[0.403, 0.192, 0.830]` |

A database can be as simple as:
Expand Down
17 changes: 17 additions & 0 deletions packages/docs/pages/usage/search/facets.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,22 @@ In the search result, `boolean` facets will be returned as an `object` with the
}
```

### Enum facets

If a property is specified as `enum` in the schema, no configuration is required.
In the search result, `enum` facets will be returned as an `object` with the following properties:

```js
{
count: 9, // Total number of values
values: {
'Action': 4, // Number of documents that have this value
'Adventure': 3, // Number of documents that have this value
'Comedy': 2, // Number of documents that have this value
}
}
```

### How facets works on array fields

Orama treats each array element as a single element of the facet:
Expand Down Expand Up @@ -209,3 +225,4 @@ const results = await search(db, {
}
}
```

11 changes: 10 additions & 1 deletion packages/docs/pages/usage/search/filters.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,19 @@ const results = await search(db, {

## Enum operators

The numeric properties support the following operators:
The enum properties support the following operators:

| Operator | Description | Example |
| --------- | ------------------------------ | --------------------------------- |
| `eq` | Equal to | `genre: { eq: 'drama' }` |
| `in` | Contained in the given array | `genre: { in: ['drama', 'horror'] }` |
| `nin` | Not contained in the given array | `genre: { nin: ['commedy'] }` |


## Enum[] operators

The enum properties support the following operators:

| Operator | Description | Example |
| --------- | ------------------------------ | --------------------------------- |
| `containsAll` | Contains all the given values | `genre: { containsAll: ['comedy', 'action'] }` |
11 changes: 11 additions & 0 deletions packages/orama/src/components/defaults.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@ export async function validateSchema<T extends AnyOrama, ResultDocument extends
if (type === 'enum' && (typeof value === 'string' || typeof value === 'number')) {
continue
}
if (type === 'enum[]' && Array.isArray(value)) {
const valueLength = value.length
for (let i = 0; i < valueLength; i++) {
if (typeof value[i] !== 'string' && typeof value[i] !== 'number') {
return prop + '.' + i
}
}
continue
}

if (isVectorType(type)) {
const vectorSize = getVectorSize(type)
Expand Down Expand Up @@ -100,12 +109,14 @@ const IS_ARRAY_TYPE: Record<SearchableType, boolean> = {
'string[]': true,
'number[]': true,
'boolean[]': true,
'enum[]': true,
}

const INNER_TYPE: Record<ArraySearchableType, ScalarSearchableType> = {
'string[]': 'string',
'number[]': 'number',
'boolean[]': 'boolean',
'enum[]': 'enum',
}

export function isVectorType(type: unknown): type is Vector {
Expand Down
19 changes: 13 additions & 6 deletions packages/orama/src/components/facets.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { createError } from '../errors.js'
import type {
AnyOrama,
FacetResult,
Expand All @@ -10,6 +11,8 @@ import type {
} from '../types.js'
import { getNested } from '../utils.js'

type FacetValue = string | boolean | number

function sortingPredicate(order: FacetSorting = 'desc', a: [string, number], b: [string, number]) {
if (order.toLowerCase() === 'asc') {
return a[1] - b[1]
Expand Down Expand Up @@ -75,19 +78,23 @@ export async function getFacets<T extends AnyOrama>(
break
}
case 'boolean':
case 'enum':
case 'string': {
calculateBooleanOrStringFacet(facets[facet].values, facetValue as string | boolean, propertyType)
calculateBooleanStringOrEnumFacet(facets[facet].values, facetValue as FacetValue, propertyType)
break
}
case 'boolean[]':
case 'enum[]':
case 'string[]': {
const alreadyInsertedValues = new Set<string>()
const innerType = propertyType === 'boolean[]' ? 'boolean' : 'string'
for (const v of facetValue as Array<string | boolean>) {
calculateBooleanOrStringFacet(facets[facet].values, v, innerType, alreadyInsertedValues)
for (const v of facetValue as Array<FacetValue>) {
calculateBooleanStringOrEnumFacet(facets[facet].values, v, innerType, alreadyInsertedValues)
}
break
}
default:
throw createError('FACET_NOT_SUPPORTED', propertyType)
}
}
}
Expand Down Expand Up @@ -137,10 +144,10 @@ function calculateNumberFacet(
}
}

function calculateBooleanOrStringFacet(
function calculateBooleanStringOrEnumFacet(
values: Record<string, number>,
facetValue: string | boolean,
propertyType: 'string' | 'boolean',
facetValue: FacetValue,
propertyType: 'string' | 'boolean' | 'enum',
alreadyInsertedValues?: Set<string>,
) {
// String or boolean based facets
Expand Down
73 changes: 43 additions & 30 deletions packages/orama/src/components/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
import type {
AnyIndexStore,
AnyOrama,
ArraySearchableType,
BM25Params,
ComparisonOperator,
EnumArrComparisonOperator,
EnumComparisonOperator,
IIndex,
ScalarSearchableType,
SearchableType,
SearchableValue,
SearchContext,
Tokenizer,
TokenScore,
TypedDocument,
VectorIndex,
VectorType,
} from '../types.js'
import { createError } from '../errors.js'
import {
create as avlCreate,
Expand All @@ -12,6 +31,7 @@ import {
import {
create as flatCreate,
filter as flatFilter,
filterArr as flatFilterArr,
insert as flatInsert,
removeDocument as flatRemoveDocument,
FlatTree,
Expand All @@ -23,24 +43,7 @@ import {
Node as RadixNode,
removeDocumentByWord as radixRemoveDocument,
} from '../trees/radix.js'
import type {
AnyIndexStore,
AnyOrama,
ArraySearchableType,
BM25Params,
ComparisonOperator,
EnumComparisonOperator,
IIndex,
ScalarSearchableType,
SearchableType,
SearchableValue,
SearchContext,
Tokenizer,
TokenScore,
TypedDocument,
VectorIndex,
VectorType
} from '../types.js'

import { intersect, safeArrayPush } from '../utils.js'
import { BM25 } from './algorithms.js'
import { getMagnitude } from './cosine-similarity.js'
Expand Down Expand Up @@ -76,6 +79,7 @@ export type TreeType =
export type TTree<T = TreeType, N = unknown> = {
type: T,
node: N
isArray: boolean
}

export type Tree =
Expand Down Expand Up @@ -234,25 +238,27 @@ export async function create<T extends AnyOrama, TSchema extends T['schema']>(
vectors: {},
}
} else {
const isArray = /\[/.test(type as string)
switch (type) {
case 'boolean':
case 'boolean[]':
index.indexes[path] = { type: 'Bool', node: { true: [], false: [] } }
index.indexes[path] = { type: 'Bool', node: { true: [], false: [] }, isArray }
break
case 'number':
case 'number[]':
index.indexes[path] = { type: 'AVL', node: avlCreate<number, InternalDocumentID[]>(0, []) }
index.indexes[path] = { type: 'AVL', node: avlCreate<number, InternalDocumentID[]>(0, []), isArray }
break
case 'string':
case 'string[]':
index.indexes[path] = { type: 'Radix', node: radixCreate() }
index.indexes[path] = { type: 'Radix', node: radixCreate(), isArray }
index.avgFieldLength[path] = 0
index.frequencies[path] = {}
index.tokenOccurrences[path] = {}
index.fieldLengths[path] = {}
break
case 'enum':
index.indexes[path] = { type: 'Flat', node: flatCreate() }
case 'enum[]':
index.indexes[path] = { type: 'Flat', node: flatCreate(), isArray }
break
default:
throw createError('INVALID_SCHEMA_TYPE', Array.isArray(type) ? 'array' : type, path)
Expand Down Expand Up @@ -468,7 +474,7 @@ export async function search<T extends AnyOrama, ResultDocument = TypedDocument<
export async function searchByWhereClause<T extends AnyOrama, ResultDocument = TypedDocument<T>>(
context: SearchContext<T, ResultDocument>,
index: Index,
filters: Record<string, boolean | ComparisonOperator | EnumComparisonOperator>,
filters: Record<string, boolean | ComparisonOperator | EnumComparisonOperator | EnumArrComparisonOperator>,
): Promise<number[]> {
const filterKeys = Object.keys(filters)

Expand All @@ -487,7 +493,7 @@ export async function searchByWhereClause<T extends AnyOrama, ResultDocument = T
throw createError('UNKNOWN_FILTER_PROPERTY', param)
}

const { node, type } = index.indexes[param]
const { node, type, isArray } = index.indexes[param]

if (type === 'Bool') {
const idx = node
Expand All @@ -514,7 +520,11 @@ export async function searchByWhereClause<T extends AnyOrama, ResultDocument = T
}

if (type === 'Flat') {
filtersMap[param].push(...flatFilter(node, operation as EnumComparisonOperator))
if (isArray) {
filtersMap[param].push(...flatFilterArr(node, operation as EnumArrComparisonOperator))
} else {
filtersMap[param].push(...flatFilter(node, operation as EnumComparisonOperator))
}
continue
}

Expand Down Expand Up @@ -608,19 +618,21 @@ export async function load<R = unknown>(sharedInternalDocumentStore: InternalDoc
const vectorIndexes: Index['vectorIndexes'] = {}

for (const prop of Object.keys(rawIndexes)) {
const { node, type } = rawIndexes[prop]
const { node, type, isArray } = rawIndexes[prop]

switch (type) {
case 'Radix':
indexes[prop] = {
type: 'Radix',
node: loadRadixNode(node)
node: loadRadixNode(node),
isArray
}
break
case 'Flat':
indexes[prop] = {
type: 'Flat',
node: loadFlatNode(node)
node: loadFlatNode(node),
isArray
}
break
default:
Expand Down Expand Up @@ -684,14 +696,15 @@ export async function save<R = unknown>(index: Index): Promise<R> {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const savedIndexes: any = {}
for (const name of Object.keys(indexes)) {
const {type, node} = indexes[name]
const {type, node, isArray} = indexes[name]
if (type !== 'Flat') {
savedIndexes[name] = indexes[name]
continue
}
savedIndexes[name] = {
type: 'Flat',
node: saveFlatNode(node)
node: saveFlatNode(node),
isArray,
}
}

Expand Down
3 changes: 3 additions & 0 deletions packages/orama/src/components/sorter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ function innerCreate<T extends AnyOrama>(
}
break
case 'enum':
// We don't allow to sort by enums
continue
case 'enum[]':
case 'boolean[]':
case 'number[]':
case 'string[]':
Expand Down
1 change: 1 addition & 0 deletions packages/orama/src/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ const errors = {
INVALID_VECTOR_VALUE: `Vector value must be a number greater than 0. Got "%s" instead.`,
INVALID_INPUT_VECTOR: `Property "%s" was declared as a %s-dimentional vector, but got a %s-dimentional vector instead.\nInput vectors must be of the size declared in the schema, as calculating similarity between vectors of different sizes can lead to unexpected results.`,
WRONG_SEARCH_PROPERTY_TYPE: `Property "%s" is not searchable. Only "string" properties are searchable.`,
FACET_NOT_SUPPORTED: `Facet doens't support the type "%s".`,
}

export type ErrorCode = keyof typeof errors
Expand Down
2 changes: 1 addition & 1 deletion packages/orama/src/methods/insert.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ async function innerInsert<T extends AnyOrama>(
continue
}

if (expectedType === 'enum' && (actualType === 'string' || actualType === 'number')) {
if ((expectedType === 'enum' || expectedType === 'enum[]') && (actualType === 'string' || actualType === 'number')) {
continue
}

Expand Down
22 changes: 21 additions & 1 deletion packages/orama/src/trees/flat.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { InternalDocumentID } from "../components/internal-document-id-store.js"
import { EnumComparisonOperator, Nullable, ScalarSearchableValue } from "../types.js"
import { EnumArrComparisonOperator, EnumComparisonOperator, Nullable, ScalarSearchableValue } from "../types.js"
import { intersect } from "../utils.js"

export interface FlatTree {
numberToDocumentId: Map<ScalarSearchableValue, InternalDocumentID[]>
Expand Down Expand Up @@ -93,3 +94,22 @@ export function filter(root: FlatTree, operation: EnumComparisonOperator): Inter

throw new Error('Invalid operation')
}

export function filterArr(root: FlatTree, operation: EnumArrComparisonOperator): InternalDocumentID[] {
const operationKeys = Object.keys(operation)

if (operationKeys.length !== 1) {
throw new Error('Invalid operation')
}

const operationType = operationKeys[0] as keyof EnumArrComparisonOperator
switch (operationType) {
case 'containsAll': {
const values = operation[operationType]!
const ids = values.map((value) => root.numberToDocumentId.get(value) ?? [])
return intersect(ids)
}
}

throw new Error('Invalid operation')
}
Loading

1 comment on commit 40562cc

@vercel
Copy link

@vercel vercel bot commented on 40562cc Sep 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.