Skip to content

Commit

Permalink
Implement enum type
Browse files Browse the repository at this point in the history
  • Loading branch information
allevo committed Sep 5, 2023
1 parent dab335d commit 6289b81
Show file tree
Hide file tree
Showing 9 changed files with 262 additions and 84 deletions.
5 changes: 5 additions & 0 deletions packages/orama/src/components/defaults.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ export async function validateSchema<S extends Schema = Schema>(doc: Document, s
continue
}

if (type === 'enum' && (typeOfValue === 'string' || typeOfValue === 'number')) {
continue
}

const typeOfType = typeof type

if (isVectorType(type as string)) {
Expand Down Expand Up @@ -82,6 +86,7 @@ const IS_ARRAY_TYPE: Record<SearchableType, boolean> = {
string: false,
number: false,
boolean: false,
enum: false,
'string[]': true,
'number[]': true,
'boolean[]': true,
Expand Down
194 changes: 115 additions & 79 deletions packages/orama/src/components/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import type {
ArraySearchableType,
BM25Params,
ComparisonOperator,
EnumComparisonOperator,
IIndex,
Magnitude,
OpaqueDocumentStore,
Expand Down Expand Up @@ -34,6 +35,12 @@ import {
Node as RadixNode,
removeDocumentByWord as radixRemoveDocument,
} from '../trees/radix.js'
import {
create as flatCreate,
insert as flatInsert,
removeDocument as flatRemoveDocument,
filter as flatFilter,
} from '../trees/flat.js'
import { intersect } from '../utils.js'
import { BM25 } from './algorithms.js'
import { getInnerType, getVectorSize, isArrayType, isVectorType } from './defaults.js'
Expand All @@ -44,6 +51,7 @@ import {
InternalDocumentIDStore,
} from './internal-document-id-store.js'
import { getMagnitude } from './cosine-similarity.js'
import { FlatTree } from '../trees/flat.js'

export type FrequencyMap = {
[property: string]: {
Expand All @@ -67,9 +75,23 @@ export type VectorIndex = {
}
}

export type Tree = {
type: 'radix'
node: RadixNode
} | {
type: 'avl'
node: AVLNode<number, InternalDocumentID[]>
} | {
type: 'bool'
node: BooleanIndex
} | {
type: 'flat'
node: FlatTree
}

export interface Index extends OpaqueIndex {
sharedInternalDocumentStore: InternalDocumentIDStore
indexes: Record<string, RadixNode | AVLNode<number, InternalDocumentID[]> | BooleanIndex>
indexes: Record<string, Tree>
vectorIndexes: Record<string, VectorIndex>
searchableProperties: string[]
searchablePropertiesWithTypes: Record<string, SearchableType>
Expand Down Expand Up @@ -223,20 +245,23 @@ export async function create(
switch (type) {
case 'boolean':
case 'boolean[]':
index.indexes[path] = { true: [], false: [] }
index.indexes[path] = { type: 'bool', node: { true: [], false: [] } }
break
case 'number':
case 'number[]':
index.indexes[path] = avlCreate<number, InternalDocumentID[]>(0, [])
index.indexes[path] = { type: 'avl', node: avlCreate<number, InternalDocumentID[]>(0, []) }
break
case 'string':
case 'string[]':
index.indexes[path] = radixCreate()
index.indexes[path] = { type: 'radix', node: radixCreate() }
index.avgFieldLength[path] = 0
index.frequencies[path] = {}
index.tokenOccurrences[path] = {}
index.fieldLengths[path] = {}
break
case 'enum':
index.indexes[path] = { type: 'flat', node: flatCreate() }
break
default:
throw createError('INVALID_SCHEMA_TYPE', Array.isArray(type) ? 'array' : (type as unknown as string), path)
}
Expand All @@ -262,27 +287,31 @@ async function insertScalar(
): Promise<void> {
const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id)

switch (schemaType) {
case 'boolean': {
const booleanIndex = index.indexes[prop] as BooleanIndex
booleanIndex[value ? 'true' : 'false'].push(internalId)
const { type, node } = index.indexes[prop]
switch (type) {
case 'bool': {
node[value ? 'true' : 'false'].push(internalId)
break
}
case 'number':
avlInsert(index.indexes[prop] as AVLNode<number, number[]>, value as number, [internalId])
case 'avl':
avlInsert(node, value as number, [internalId])
break
case 'string': {
case 'radix': {
const tokens = await tokenizer.tokenize(value as string, language, prop)
await implementation.insertDocumentScoreParameters(index, prop, internalId, tokens, docsCount)

for (const token of tokens) {
await implementation.insertTokenScoreParameters(index, prop, internalId, tokens, token)

radixInsert(index.indexes[prop] as RadixNode, token, internalId)
radixInsert(node, token, internalId)
}

break
}
case 'flat': {
flatInsert(node, value as ScalarSearchableType, internalId)
break
}
}
}

Expand Down Expand Up @@ -348,30 +377,35 @@ async function removeScalar(
): Promise<boolean> {
const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id)

switch (schemaType) {
case 'number': {
avlRemoveDocument(index.indexes[prop] as AVLNode<number, InternalDocumentID[]>, internalId, value)
const { type, node } = index.indexes[prop]
switch (type) {
case 'avl': {
avlRemoveDocument(node, internalId, value as number)
return true
}
case 'boolean': {
case 'bool': {
const booleanKey = value ? 'true' : 'false'
const position = (index.indexes[prop] as BooleanIndex)[booleanKey].indexOf(internalId)
const position = node[booleanKey].indexOf(internalId)

;(index.indexes[prop] as BooleanIndex)[value ? 'true' : 'false'].splice(position, 1)
node[value ? 'true' : 'false'].splice(position, 1)
return true
}
case 'string': {
case 'radix': {
const tokens = await tokenizer.tokenize(value as string, language, prop)

await implementation.removeDocumentScoreParameters(index, prop, id, docsCount)

for (const token of tokens) {
await implementation.removeTokenScoreParameters(index, prop, token)
radixRemoveDocument(index.indexes[prop] as RadixNode, token, internalId)
radixRemoveDocument(node, token, internalId)
}

return true
}
case 'flat': {
flatRemoveDocument(node, internalId, value as ScalarSearchableType)
return true
}
}
}

Expand Down Expand Up @@ -421,10 +455,13 @@ export async function search<D extends OpaqueDocumentStore, AggValue>(
return []
}

// Performa the search
const rootNode = index.indexes[prop] as RadixNode
const { node, type } = index.indexes[prop]
if (type !== 'radix') {
throw createError('WRONG_SEARCH_PROPERTY_TYPE', prop)
}

const { exact, tolerance } = context.params
const searchResult = radixFind(rootNode, { term, exact, tolerance })
const searchResult = radixFind(node, { term, exact, tolerance })
const ids = new Set<InternalDocumentID>()

for (const key in searchResult) {
Expand All @@ -439,7 +476,7 @@ export async function search<D extends OpaqueDocumentStore, AggValue>(
export async function searchByWhereClause<I extends OpaqueIndex, D extends OpaqueDocumentStore, AggValue>(
context: SearchContext<I, D, AggValue>,
index: Index,
filters: Record<string, boolean | ComparisonOperator>,
filters: Record<string, boolean | ComparisonOperator | EnumComparisonOperator>,
): Promise<number[]> {
const filterKeys = Object.keys(filters)

Expand All @@ -454,29 +491,24 @@ export async function searchByWhereClause<I extends OpaqueIndex, D extends Opaqu
for (const param of filterKeys) {
const operation = filters[param]

if (typeof operation === 'boolean') {
const idx = index.indexes[param] as BooleanIndex
if (typeof index.indexes[param] === 'undefined') {
throw createError('UNKNOWN_FILTER_PROPERTY', param)
}

if (typeof idx === 'undefined') {
throw createError('UNKNOWN_FILTER_PROPERTY', param)
}
const { node, type } = index.indexes[param]

if (type === 'bool') {
const idx = node
const filteredIDs = idx[operation.toString() as keyof BooleanIndex]
filtersMap[param].push(...filteredIDs)
continue
}

if (typeof operation === 'string' || Array.isArray(operation)) {
const idx = index.indexes[param] as RadixNode

if (typeof idx === 'undefined') {
throw createError('UNKNOWN_FILTER_PROPERTY', param)
}

if (type === 'radix' && (typeof operation === 'string' || Array.isArray(operation))) {
for (const raw of [operation].flat()) {
const term = await context.tokenizer.tokenize(raw, context.language, param)
for (const t of term) {
const filteredIDsResults = radixFind(idx, { term: t, exact: true })
const filteredIDsResults = radixFind(node, { term: t, exact: true })
filtersMap[param].push(...Object.values(filteredIDsResults).flat())
}
}
Expand All @@ -490,45 +522,46 @@ export async function searchByWhereClause<I extends OpaqueIndex, D extends Opaqu
throw createError('INVALID_FILTER_OPERATION', operationKeys.length)
}

const operationOpt = operationKeys[0] as keyof ComparisonOperator
const operationValue = operation[operationOpt]

const AVLNode = index.indexes[param] as AVLNode<number, InternalDocumentID[]>

if (typeof AVLNode === 'undefined') {
throw createError('UNKNOWN_FILTER_PROPERTY', param)
if (type === 'flat') {
filtersMap[param].push(...flatFilter(node, operation as EnumComparisonOperator))
continue
}

switch (operationOpt) {
case 'gt': {
const filteredIDs = avlGreaterThan(AVLNode, operationValue, false)
filtersMap[param].push(...filteredIDs)
break
}
case 'gte': {
const filteredIDs = avlGreaterThan(AVLNode, operationValue, true)
filtersMap[param].push(...filteredIDs)
break
}
case 'lt': {
const filteredIDs = avlLessThan(AVLNode, operationValue, false)
filtersMap[param].push(...filteredIDs)
break
}
case 'lte': {
const filteredIDs = avlLessThan(AVLNode, operationValue, true)
filtersMap[param].push(...filteredIDs)
break
}
case 'eq': {
const filteredIDs = avlFind(AVLNode, operationValue) ?? []
filtersMap[param].push(...filteredIDs)
break
}
case 'between': {
const [min, max] = operationValue as number[]
const filteredIDs = avlRangeSearch(AVLNode, min, max)
filtersMap[param].push(...filteredIDs)
if (type === 'avl') {
const operationOpt = operationKeys[0] as keyof ComparisonOperator
const operationValue = (operation as ComparisonOperator)[operationOpt] as number

switch (operationOpt) {
case 'gt': {
const filteredIDs = avlGreaterThan(node, operationValue, false)
filtersMap[param].push(...filteredIDs)
break
}
case 'gte': {
const filteredIDs = avlGreaterThan(node, operationValue, true)
filtersMap[param].push(...filteredIDs)
break
}
case 'lt': {
const filteredIDs = avlLessThan(node, operationValue, false)
filtersMap[param].push(...filteredIDs)
break
}
case 'lte': {
const filteredIDs = avlLessThan(node, operationValue, true)
filtersMap[param].push(...filteredIDs)
break
}
case 'eq': {
const filteredIDs = avlFind(node, operationValue) ?? []
filtersMap[param].push(...filteredIDs)
break
}
case 'between': {
const [min, max] = operationValue as unknown as number[]
const filteredIDs = avlRangeSearch(node, min, max)
filtersMap[param].push(...filteredIDs)
}
}
}
}
Expand Down Expand Up @@ -576,15 +609,18 @@ export async function load<R = unknown>(sharedInternalDocumentStore: InternalDoc
const vectorIndexes: Index['vectorIndexes'] = {}

for (const prop of Object.keys(rawIndexes)) {
const value = rawIndexes[prop]

if (!('word' in value)) {
indexes[prop] = value
const { node, type } = rawIndexes[prop]

if (type !== 'radix') {
indexes[prop] = rawIndexes[prop]
continue
}

indexes[prop] = loadNode(value)
indexes[prop] = {
type: 'radix',

node: loadNode(node)
}
}

for (const idx of Object.keys(rawVectorIndexes)) {
Expand Down
1 change: 1 addition & 0 deletions packages/orama/src/components/sorter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ function innerCreate(
type: type,
}
break
case 'enum':
case 'boolean[]':
case 'number[]':
case 'string[]':
Expand Down
1 change: 1 addition & 0 deletions packages/orama/src/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const errors = {
INVALID_VECTOR_SIZE: `Vector size must be a number greater than 0. Got "%s" instead.`,
INVALID_VECTOR_VALUE: `Vector value must be a number greater than 0. Got "%s" instead.`,
INVALID_INPUT_VECTOR: `Property "%s" was declared as a %s-dimentional vector, but got a %s-dimentional vector instead.\nInput vectors must be of the size declared in the schema, as calculating similarity between vectors of different sizes can lead to unexpected results.`,
WRONG_SEARCH_PROPERTY_TYPE: `Property "%s" is not searchable. Only "string" properties are searchable.`,
}

export type ErrorCode = keyof typeof errors
Expand Down
4 changes: 4 additions & 0 deletions packages/orama/src/methods/insert.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ async function innerInsert(orama: Orama, doc: Document, language?: string, skipH
continue
}

if (expectedType === 'enum' && (actualType === 'string' || actualType === 'number')) {
continue
}

if (actualType !== expectedType) {
throw createError('INVALID_DOCUMENT_PROPERTY', key, expectedType, actualType)
}
Expand Down
Loading

0 comments on commit 6289b81

Please sign in to comment.