Skip to content

Commit

Permalink
fix(plugin-match-highlight): support tolerance parameter in searchWit…
Browse files Browse the repository at this point in the history
…hHighlight (#489)
  • Loading branch information
stearm authored Sep 21, 2023
1 parent b807c47 commit 2dd5e75
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 13 deletions.
46 changes: 33 additions & 13 deletions packages/plugin-match-highlight/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
save,
search
} from '@orama/orama'
import { boundedLevenshtein } from '@orama/orama/internals';

export interface Position {
start: number
Expand Down Expand Up @@ -89,20 +90,39 @@ export async function searchWithHighlight<T extends AnyOrama, ResultDocument = T
): Promise<SearchResultWithHighlight<ResultDocument>> {
const result = await search(orama, params, language)
const queryTokens: string[] = await orama.tokenizer.tokenize(params.term ?? '', language)
const hits = result.hits.map((hit: AnyDocument) =>
Object.assign(hit, {
positions: Object.fromEntries(
Object.entries<any>((orama as OramaWithHighlight<T>).data.positions[hit.id]).map(([propName, tokens]) => [
propName,
Object.fromEntries(
Object.entries(tokens).filter(([token]) => queryTokens.find(queryToken => token.startsWith(queryToken)))
)
])
)
})
)

result.hits = hits
let hitsWithPosition = []
for (const hit of result.hits) {
const hitPositions = Object.entries<any>((orama as OramaWithHighlight<T>).data.positions[hit.id])

let hits: AnyDocument[] = []
for (const [propName, tokens] of hitPositions) {
const matchWithSearchTokens = []

const tokenEntries = Object.entries(tokens)
for (const tokenEntry of tokenEntries) {
const [token] = tokenEntry

for (const queryToken of queryTokens) {
if (params.tolerance) {
const distance = await boundedLevenshtein(token, queryToken, params.tolerance)
if (distance.isBounded) {
matchWithSearchTokens.push(tokenEntry)
break
}
} else if (token.startsWith(queryToken)) {
matchWithSearchTokens.push(tokenEntry)
break
}
}
}
hits.push([propName, Object.fromEntries(matchWithSearchTokens)])
}

hitsWithPosition.push(Object.assign(hit, { positions: Object.fromEntries(hits) }))
}

result.hits = hitsWithPosition

// @ts-ignore
return result
Expand Down
27 changes: 27 additions & 0 deletions packages/plugin-match-highlight/test/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,33 @@ t.test('should retrieve positions', async t => {
t.same(results.hits[0].positions, { text: { hello: [{ start: 0, length: 5 }] } });
});

t.test('should retrieve positions also with typo, if tolerance is used', async t => {
const schema = {
title: 'string',
summary: 'string',
id: 'string',
slug: 'string'
} as const

const db = await create({ schema, components: { afterInsert } })

await insert(db, {
title: 'Introduction to React',
summary: 'React is a popular JavaScript library for building user interfaces, primarily for single-page applications. By utilizing a component-based architecture, it allows developers to build reusable UI components and manage the state of an application seamlessly. This introduction covers its core philosophies, JSX, and the virtual DOM.',
id: '1a2b3c',
slug: 'introduction-to-react'
})

const results = await searchWithHighlight(db, { term: 'reat', tolerance: 1 })

t.same(results.hits[0].positions, {
title: { react: [{ start: 16, length: 5 } ] },
summary: { react: [ { start: 0, length: 5 } ] },
id: {},
slug: {}
})
});

t.test('should work with texts containing constructor and __proto__ properties', async t => {
const schema = {
text: 'string',
Expand Down

1 comment on commit 2dd5e75

@vercel
Copy link

@vercel vercel bot commented on 2dd5e75 Sep 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.