diff --git a/.swcrc b/.swcrc
index dd1de6e60..ef696c79a 100644
--- a/.swcrc
+++ b/.swcrc
@@ -14,10 +14,6 @@
},
"experimental": {
"keepImportAssertions": true
- },
- "baseUrl": ".",
- "paths": {
- "@stemmer/*": ["./src/stemmer/*"]
}
},
"sourceMaps": true
@@ -41,10 +37,6 @@
},
"experimental": {
"keepImportAssertions": true
- },
- "baseUrl": ".",
- "paths": {
- "@stemmer/*": ["./src/stemmer/*"]
}
},
"sourceMaps": true
diff --git a/benchmarks/src/typo-tolerant-search.js b/benchmarks/src/typo-tolerant-search.js
index 177109788..1834688b4 100644
--- a/benchmarks/src/typo-tolerant-search.js
+++ b/benchmarks/src/typo-tolerant-search.js
@@ -1,5 +1,7 @@
import cronometro from "cronometro";
-import { create, insertBatch, search } from "../../dist/index.js";
+import { isMainThread } from "worker_threads";
+import { create, insertMultiple, search } from "../../dist/index.js";
+import { createTokenizer } from "../../dist/internals.js";
import { formattedEvents } from "./utils/dataset.js";
const db = await create({
@@ -22,15 +24,17 @@ const dbNoStemming = await create({
second: "string",
},
},
- tokenizer: {
- enableStemming: false,
+ components: {
+ tokenizer: await createTokenizer("english", { stemming: false }),
},
});
const first30000Events = formattedEvents.slice(0, 30_000);
-await insertBatch(db, first30000Events);
-await insertBatch(dbNoStemming, first30000Events);
+if (!isMainThread) {
+ await insertMultiple(db, first30000Events);
+ await insertMultiple(dbNoStemming, first30000Events);
+}
await cronometro({
'search "beauty", default settings': () => {
diff --git a/examples/with-react/index.html b/examples/with-react/index.html
index 31772ec0a..22b2dbc06 100644
--- a/examples/with-react/index.html
+++ b/examples/with-react/index.html
@@ -1,13 +1,13 @@
-
-
-
-
- Lyra + React
-
-
-
-
-
+
+
+
+
+ Lyra + React
+
+
+
+
+
diff --git a/examples/with-react/src/App.css b/examples/with-react/src/App.css
index cf3316cc0..12a9e1ae9 100644
--- a/examples/with-react/src/App.css
+++ b/examples/with-react/src/App.css
@@ -1,113 +1,113 @@
.main {
- width: 80vw;
- min-height: 100vh;
- display: flex;
- flex-direction: column;
- align-items: center;
- justify-content: center;
+ width: 80vw;
+ min-height: 100vh;
+ display: flex;
+ flex-direction: column;
+ align-items: center;
+ justify-content: center;
}
.top {
- width: 100%;
- display: flex;
- align-items: center;
- justify-content: space-between;
- margin: 2rem 0;
+ width: 100%;
+ display: flex;
+ align-items: center;
+ justify-content: space-between;
+ margin: 2rem 0;
}
.title,
.movieTitle {
- width: 100%;
- text-align: left;
- font-size: 1.5rem;
- margin: 0;
+ width: 100%;
+ text-align: left;
+ font-size: 1.5rem;
+ margin: 0;
}
.movieTitle {
- font-size: 1.2rem;
- margin-bottom: 1rem;
+ font-size: 1.2rem;
+ margin-bottom: 1rem;
}
.input {
- outline: none;
- border: none;
- background-color: transparent;
- border: 1px solid #d5b3ffaa;
- border-radius: 1rem;
- padding: 0.5rem 1rem;
- width: 50%;
- font-size: 1rem;
- color: #eee;
- transition: 0.15s;
+ outline: none;
+ border: none;
+ background-color: transparent;
+ border: 1px solid #d5b3ffaa;
+ border-radius: 1rem;
+ padding: 0.5rem 1rem;
+ width: 50%;
+ font-size: 1rem;
+ color: #eee;
+ transition: 0.15s;
}
.input:focus {
- border-color: #d5b3ff;
+ border-color: #d5b3ff;
}
.button {
- outline: none;
- border: none;
- background-color: transparent;
- border: 1px solid #ef4444;
- border-radius: 0.5rem;
- padding: 0.5rem 1rem;
- width: fit-content;
- font-size: 1rem;
- color: #eee;
- cursor: pointer;
- transition: 0.15s;
+ outline: none;
+ border: none;
+ background-color: transparent;
+ border: 1px solid #ef4444;
+ border-radius: 0.5rem;
+ padding: 0.5rem 1rem;
+ width: fit-content;
+ font-size: 1rem;
+ color: #eee;
+ cursor: pointer;
+ transition: 0.15s;
}
.button:hover {
- color: #ef4444;
+ color: #ef4444;
}
.container {
- width: 100%;
- display: grid;
- grid-template-columns: repeat(2, minmax(0, 1fr));
- column-gap: 1rem;
- row-gap: 1rem;
+ width: 100%;
+ display: grid;
+ grid-template-columns: repeat(2, minmax(0, 1fr));
+ column-gap: 1rem;
+ row-gap: 1rem;
}
.movie {
- padding: 1rem;
- border-radius: 0.5rem;
- border: 2px solid #d5b3ffaa;
- cursor: pointer;
- transition: 0.15s;
+ padding: 1rem;
+ border-radius: 0.5rem;
+ border: 2px solid #d5b3ffaa;
+ cursor: pointer;
+ transition: 0.15s;
}
.movie:hover {
- border-color: #d5b3ff;
+ border-color: #d5b3ff;
}
.favLabel {
- width: 100%;
- text-align: right;
- margin: 0;
+ width: 100%;
+ text-align: right;
+ margin: 0;
}
.details {
- width: 100%;
- display: flex;
- flex-direction: column;
- align-items: flex-end;
- justify-content: center;
+ width: 100%;
+ display: flex;
+ flex-direction: column;
+ align-items: flex-end;
+ justify-content: center;
}
.sub {
- margin: 0;
- text-align: right;
+ margin: 0;
+ text-align: right;
}
@media (max-width: 700px) {
- .container {
- grid-template-columns: repeat(1, minmax(0, 1fr));
- }
+ .container {
+ grid-template-columns: repeat(1, minmax(0, 1fr));
+ }
- .input {
- width: 90%;
- }
+ .input {
+ width: 90%;
+ }
}
diff --git a/examples/with-react/src/App.tsx b/examples/with-react/src/App.tsx
index 1580c3c41..be87cff0a 100644
--- a/examples/with-react/src/App.tsx
+++ b/examples/with-react/src/App.tsx
@@ -1,112 +1,103 @@
+import { create, insertMultiple, search } from "@lyrasearch/lyra";
import { useEffect, useRef, useState } from "react";
-import { create, insertBatch, search } from "@lyrasearch/lyra";
-import data from "./assets/db.json";
import "./App.css";
+import data from "./assets/db.json";
interface Movies {
- id: string;
- title: string;
- director: string;
- plot: string;
- year: number;
- isFavorite: boolean;
+ id: string;
+ title: string;
+ director: string;
+ plot: string;
+ year: number;
+ isFavorite: boolean;
}
function App() {
- const searchInput = useRef(null);
- const [isSearching, setIsSearching] = useState(false);
- const [movies, setMovies] = useState();
+ const searchInput = useRef(null);
+ const [isSearching, setIsSearching] = useState(false);
+ const [movies, setMovies] = useState();
- useEffect(() => {
- // show all movies at the start
- setMovies(data);
- }, []);
+ useEffect(() => {
+ // show all movies at the start
+ setMovies(data);
+ }, []);
- const handleSearch = async (e: React.KeyboardEvent) => {
- if (e.key != "Enter") return;
+ const handleSearch = async (e: React.KeyboardEvent) => {
+ if (e.key != "Enter") return;
- const db = await create({
- schema: {
- id: "string", // users usually don't search this
- title: "string",
- director: "string",
- plot: "string", // users usually don't search this
- year: "number", // unsearchable
- isFavorite: "boolean", // unsearchable
- },
- });
+ const db = await create({
+ schema: {
+ id: "string", // users usually don't search this
+ title: "string",
+ director: "string",
+ plot: "string", // users usually don't search this
+ year: "number", // unsearchable
+ isFavorite: "boolean", // unsearchable
+ },
+ });
- await insertBatch(db, data, { batchSize: data.length });
+ await insertMultiple(db, data, { batchSize: data.length });
- const searchResult = await search(db, {
- term: searchInput.current!.value,
- properties: ["title", "director"],
- tolerance: 1, // for typo tolerance
- });
+ const searchResult = await search(db, {
+ term: searchInput.current!.value,
+ properties: ["title", "director"],
+ tolerance: 1, // for typo tolerance
+ });
- let result = [];
+ let result = [];
- for (let i = 0; i < searchResult.hits.length; i++) {
- result.push(searchResult.hits[i].document);
- }
+ for (let i = 0; i < searchResult.hits.length; i++) {
+ result.push(searchResult.hits[i].document);
+ }
- setIsSearching(true);
- setMovies(result);
- };
+ setIsSearching(true);
+ setMovies(result);
+ };
- return (
-
-
- {isSearching ? (
- <>
-
- Searching for "{searchInput.current!.value}"
-
-
- >
- ) : (
- <>
- All Movies
- handleSearch(e)}
- className="input"
- />
- >
- )}
-
-
- {movies?.length ? (
- <>
- {movies.map((movie) => (
-
- {movie.isFavorite && (
-
⭐
- )}
-
{movie.title}
-
{movie.plot}
-
- {movie.director}
- {movie.year}
-
-
- ))}
- >
- ) : (
-
No movies found...
- )}
-
-
- );
+ return (
+
+
+ {isSearching ? (
+ <>
+
Searching for "{searchInput.current!.value}"
+
+ >
+ ) : (
+ <>
+ All Movies
+ handleSearch(e)} className="input" />
+ >
+ )}
+
+
+ {movies?.length ? (
+ <>
+ {movies.map(movie => (
+
+ {movie.isFavorite &&
⭐
}
+
{movie.title}
+
{movie.plot}
+
+ {movie.director}
+ {movie.year}
+
+
+ ))}
+ >
+ ) : (
+
No movies found...
+ )}
+
+
+ );
}
export default App;
diff --git a/examples/with-react/src/assets/db.json b/examples/with-react/src/assets/db.json
index 0c16d2f9f..8335d3bc8 100644
--- a/examples/with-react/src/assets/db.json
+++ b/examples/with-react/src/assets/db.json
@@ -1,26 +1,26 @@
[
- {
- "id": "1",
- "title": "The Prestige",
- "director": "Christopher Nolan",
- "plot": "Two friends and fellow magicians become bitter enemies after a sudden tragedy. As they devote themselves to this rivalry, they make sacrifices that bring them fame but with terrible consequences.",
- "year": 2006,
- "isFavorite": true
- },
- {
- "id": "2",
- "title": "Big Fish",
- "director": "Tim Burton",
- "plot": "Will Bloom returns home to care for his dying father, who had a penchant for telling unbelievable stories. After he passes away, Will tries to find out if his tales were really true.",
- "year": 2004,
- "isFavorite": true
- },
- {
- "id": "3",
- "title": "Harry Potter and the Philosopher's Stone",
- "director": "Chris Columbus",
- "plot": "Harry Potter, an eleven-year-old orphan, discovers that he is a wizard and is invited to study at Hogwarts. Even as he escapes a dreary life and enters a world of magic, he finds trouble awaiting him.",
- "year": 2001,
- "isFavorite": false
- }
+ {
+ "id": "1",
+ "title": "The Prestige",
+ "director": "Christopher Nolan",
+ "plot": "Two friends and fellow magicians become bitter enemies after a sudden tragedy. As they devote themselves to this rivalry, they make sacrifices that bring them fame but with terrible consequences.",
+ "year": 2006,
+ "isFavorite": true
+ },
+ {
+ "id": "2",
+ "title": "Big Fish",
+ "director": "Tim Burton",
+ "plot": "Will Bloom returns home to care for his dying father, who had a penchant for telling unbelievable stories. After he passes away, Will tries to find out if his tales were really true.",
+ "year": 2004,
+ "isFavorite": true
+ },
+ {
+ "id": "3",
+ "title": "Harry Potter and the Philosopher's Stone",
+ "director": "Chris Columbus",
+ "plot": "Harry Potter, an eleven-year-old orphan, discovers that he is a wizard and is invited to study at Hogwarts. Even as he escapes a dreary life and enters a world of magic, he finds trouble awaiting him.",
+ "year": 2001,
+ "isFavorite": false
+ }
]
diff --git a/examples/with-react/src/index.css b/examples/with-react/src/index.css
index 8da03067f..063350e34 100644
--- a/examples/with-react/src/index.css
+++ b/examples/with-react/src/index.css
@@ -2,13 +2,13 @@
html,
body {
- padding: 0;
- margin: 0;
- font-family: "Inter", sans-serif;
- background-color: #1b1b1d;
- color: #eee;
- overflow-x: hidden;
- scroll-behavior: smooth;
+ padding: 0;
+ margin: 0;
+ font-family: "Inter", sans-serif;
+ background-color: #1b1b1d;
+ color: #eee;
+ overflow-x: hidden;
+ scroll-behavior: smooth;
}
h1,
@@ -18,25 +18,25 @@ h4,
h5,
h6,
p {
- margin: 0;
- margin-bottom: 0.5rem;
+ margin: 0;
+ margin-bottom: 0.5rem;
}
a {
- color: inherit;
- text-decoration: none;
+ color: inherit;
+ text-decoration: none;
}
* {
- box-sizing: border-box;
+ box-sizing: border-box;
}
#root {
- padding: 0;
- margin: 0;
- width: 100vw;
- min-height: 100vh;
- display: flex;
- align-items: center;
- justify-content: center;
+ padding: 0;
+ margin: 0;
+ width: 100vw;
+ min-height: 100vh;
+ display: flex;
+ align-items: center;
+ justify-content: center;
}
diff --git a/examples/with-react/src/main.tsx b/examples/with-react/src/main.tsx
index 7f8e89e6b..8b1ddb971 100644
--- a/examples/with-react/src/main.tsx
+++ b/examples/with-react/src/main.tsx
@@ -4,7 +4,7 @@ import App from "./App";
import "./index.css";
ReactDOM.createRoot(document.getElementById("root") as HTMLElement).render(
-
-
-
+
+
+ ,
);
diff --git a/examples/with-react/vite.config.ts b/examples/with-react/vite.config.ts
index 5a33944a9..9cc50ead1 100644
--- a/examples/with-react/vite.config.ts
+++ b/examples/with-react/vite.config.ts
@@ -1,7 +1,7 @@
-import { defineConfig } from 'vite'
-import react from '@vitejs/plugin-react'
+import { defineConfig } from "vite";
+import react from "@vitejs/plugin-react";
// https://vitejs.dev/config/
export default defineConfig({
plugins: [react()],
-})
+});
diff --git a/package.json b/package.json
index 834e69a17..a3c5fd331 100644
--- a/package.json
+++ b/package.json
@@ -61,7 +61,7 @@
"test:smoke": "tap --rcfile=tests/config/tap.yml tests/smoke/*.test.ts",
"ci": "npm run build && npm run test:ci && npm run test:smoke",
"prepare": "husky install && npm run build",
- "commit": "pnpm lint-staged && cz",
+ "commit": "pnpm run format && pnpm lint-staged && cz",
"benchmark": "node --no-warnings benchmarks/src/typo-tolerant-search.js"
},
"keywords": [
@@ -118,7 +118,8 @@
"tape": "^5.6.1",
"tcompare": "^6.0.0",
"tsx": "^3.12.1",
- "typescript": "^4.9.4"
+ "typescript": "^4.9.4",
+ "vite": "^4.1.4"
},
"pnpm": {
"peerDependencyRules": {
diff --git a/src/algorithms.ts b/src/algorithms.ts
deleted file mode 100644
index f3f8f3813..000000000
--- a/src/algorithms.ts
+++ /dev/null
@@ -1,93 +0,0 @@
-import type { BM25Params, TokenScore } from "./types/index.js";
-import * as ERRORS from "./errors.js";
-
-// Adapted from https://github.com/lovasoa/fast_array_intersect
-// MIT Licensed (https://github.com/lovasoa/fast_array_intersect/blob/master/LICENSE)
-// while on tag https://github.com/lovasoa/fast_array_intersect/tree/v1.1.0
-export function intersectTokenScores(arrays: TokenScore[][]): TokenScore[] {
- if (arrays.length === 0) {
- return [];
- }
-
- for (let i = 1; i < arrays.length; i++) {
- if (arrays[i].length < arrays[0].length) {
- const tmp = arrays[0];
- arrays[0] = arrays[i];
- arrays[i] = tmp;
- }
- }
-
- const set: Map = new Map();
- for (const elem of arrays[0]) {
- set.set(elem[0], [1, elem[1]]);
- }
-
- const arrLength = arrays.length;
- for (let i = 1; i < arrLength; i++) {
- let found = 0;
- for (const elem of arrays[i]) {
- /* c8 ignore next */
- const key = elem[0] ?? "";
-
- const [count, score] = set.get(key) ?? [0, 0];
- if (count === i) {
- set.set(key, [count + 1, score + elem[1]]);
- found++;
- }
- }
-
- if (found === 0) {
- return [];
- }
- }
-
- const result: TokenScore[] = [];
-
- for (const [token, [count, score]] of set) {
- if (count === arrLength) {
- result.push([token, score]);
- }
- }
-
- return result;
-}
-
-export function prioritizeTokenScores(arrays: TokenScore[][], boost: number): TokenScore[] {
- if (boost === 0) {
- throw new Error(ERRORS.INVALID_BOOST_VALUE());
- }
-
- const tokenMap: Record = {};
-
- const mapsLength = arrays.length;
- for (let i = 0; i < mapsLength; i++) {
- const arr = arrays[i];
-
- const entriesLength = arr.length;
- for (let j = 0; j < entriesLength; j++) {
- const [token, score] = arr[j];
- const boostScore = score * boost;
-
- if (token in tokenMap) {
- tokenMap[token] *= 1.5 + boostScore;
- } else {
- tokenMap[token] = boostScore;
- }
- }
- }
-
- return Object.entries(tokenMap).sort((a, b) => b[1] - a[1]);
-}
-
-export function BM25(
- tf: number,
- matchingCount: number,
- docsCount: number,
- fieldLength: number,
- averageFieldLength: number,
- BM25Params: BM25Params
-) {
- const { k, b, d } = BM25Params;
- const idf = Math.log(1 + (docsCount - matchingCount + 0.5) / (matchingCount + 0.5));
- return idf * (d + tf * (k + 1)) / (tf + k * (1 - b + b * fieldLength / averageFieldLength));
-}
\ No newline at end of file
diff --git a/src/cjs/index.cts b/src/cjs/index.cts
index 8af9d4cf1..e772bf90d 100644
--- a/src/cjs/index.cts
+++ b/src/cjs/index.cts
@@ -1,40 +1,47 @@
import type { create as esmCreate } from "../methods/create.js";
-import type {
- insert as esmInsert,
- insertBatch as esmInsertBatch,
- insertWithHooks as esmInsertWithHooks,
-} from "../methods/insert.js";
-import type { load as esmLoad } from "../methods/load.js";
-import type { remove as esmRemove } from "../methods/remove.js";
-import type { save as esmSave } from "../methods/save.js";
-import type { search as esmSearch } from "../methods/search.js";
import type { count as esmCount, getByID as esmGetByID } from "../methods/docs.js";
+import type { insert as esmInsert, insertMultiple as esminsertMultiple } from "../methods/insert.js";
+import type { remove as esmRemove, removeMultiple as esmRemoveMultiple } from "../methods/remove.js";
+import type { search as esmSearch } from "../methods/search.js";
+import type { load as esmLoad, save as esmSave } from "../methods/serialization.js";
+import type { update as esmUpdate, updateMultiple as esmUpdateMultiple } from "../methods/update.js";
export interface LyraExport {
+ count: typeof esmCount;
create: typeof esmCreate;
+ getByID: typeof esmGetByID;
insert: typeof esmInsert;
- insertWithHooks: typeof esmInsertWithHooks;
- insertBatch: typeof esmInsertBatch;
+ insertMultiple: typeof esminsertMultiple;
+ load: typeof esmLoad;
remove: typeof esmRemove;
- search: typeof esmSearch;
+ removeMultiple: typeof esmRemoveMultiple;
save: typeof esmSave;
- load: typeof esmLoad;
- count: typeof esmCount;
- getByID: typeof esmGetByID;
+ search: typeof esmSearch;
}
export type RequireCallback = (err: Error | undefined, lyra?: LyraExport) => void;
+let _esmCount: typeof esmCount;
let _esmCreate: typeof esmCreate;
+let _esmGetByID: typeof esmGetByID;
let _esmInsert: typeof esmInsert;
-let _esmInsertWithHooks: typeof esmInsertWithHooks;
-let _esmInsertBatch: typeof esmInsertBatch;
+let _esmInsertMultiple: typeof esminsertMultiple;
+let _esmLoad: typeof esmLoad;
let _esmRemove: typeof esmRemove;
-let _esmSearch: typeof esmSearch;
+let _esmRemoveMultiple: typeof esmRemoveMultiple;
let _esmSave: typeof esmSave;
-let _esmLoad: typeof esmLoad;
-let _esmCount: typeof esmCount;
-let _esmGetByID: typeof esmGetByID;
+let _esmSearch: typeof esmSearch;
+let _esmUpdate: typeof esmUpdate;
+let _esmUpdateMultiple: typeof esmUpdateMultiple;
+
+export async function count(...args: Parameters): ReturnType {
+ if (!_esmCount) {
+ const imported = await import("../methods/docs.js");
+ _esmCount = imported.count;
+ }
+
+ return _esmCount(...args);
+}
export async function create(...args: Parameters): ReturnType {
if (!_esmCreate) {
@@ -45,6 +52,15 @@ export async function create(...args: Parameters): ReturnType<
return _esmCreate(...args);
}
+export async function getByID(...args: Parameters): ReturnType {
+ if (!_esmGetByID) {
+ const imported = await import("../methods/docs.js");
+ _esmGetByID = imported.getByID;
+ }
+
+ return _esmGetByID(...args);
+}
+
export async function insert(...args: Parameters): ReturnType {
if (!_esmInsert) {
const imported = await import("../methods/insert.js");
@@ -54,24 +70,24 @@ export async function insert(...args: Parameters): ReturnType<
return _esmInsert(...args);
}
-export async function insertWithHooks(
- ...args: Parameters
-): ReturnType {
- if (!_esmInsertWithHooks) {
+export async function insertMultiple(
+ ...args: Parameters
+): ReturnType {
+ if (!_esmInsertMultiple) {
const imported = await import("../methods/insert.js");
- _esmInsertWithHooks = imported.insertWithHooks;
+ _esmInsertMultiple = imported.insertMultiple;
}
- return _esmInsertWithHooks(...args);
+ return _esmInsertMultiple(...args);
}
-export async function insertBatch(...args: Parameters): ReturnType {
- if (!_esmInsertBatch) {
- const imported = await import("../methods/insert.js");
- _esmInsertBatch = imported.insertBatch;
+export async function load(...args: Parameters): ReturnType {
+ if (!_esmLoad) {
+ const imported = await import("../methods/serialization.js");
+ _esmLoad = imported.load;
}
- return _esmInsertBatch(...args);
+ return _esmLoad(...args);
}
export async function remove(...args: Parameters): ReturnType {
@@ -83,49 +99,53 @@ export async function remove(...args: Parameters): ReturnType<
return _esmRemove(...args);
}
-export async function search(...args: Parameters): ReturnType {
- if (!_esmSearch) {
- const imported = await import("../methods/search.js");
- _esmSearch = imported.search;
+export async function removeMultiple(
+ ...args: Parameters
+): ReturnType {
+ if (!_esmRemoveMultiple) {
+ const imported = await import("../methods/remove.js");
+ _esmRemoveMultiple = imported.removeMultiple;
}
- return _esmSearch(...args);
+ return _esmRemoveMultiple(...args);
}
export async function save(...args: Parameters): ReturnType {
if (!_esmSave) {
- const imported = await import("../methods/save.js");
+ const imported = await import("../methods/serialization.js");
_esmSave = imported.save;
}
return _esmSave(...args);
}
-export async function load(...args: Parameters): ReturnType {
- if (!_esmLoad) {
- const imported = await import("../methods/load.js");
- _esmLoad = imported.load;
+export async function search(...args: Parameters): ReturnType {
+ if (!_esmSearch) {
+ const imported = await import("../methods/search.js");
+ _esmSearch = imported.search;
}
- return _esmLoad(...args);
+ return _esmSearch(...args);
}
-export async function count(...args: Parameters): ReturnType {
- if (!_esmCount) {
- const imported = await import("../methods/docs.js");
- _esmCount = imported.count;
+export async function update(...args: Parameters): ReturnType {
+ if (!_esmUpdate) {
+ const imported = await import("../methods/update.js");
+ _esmUpdate = imported.update;
}
- return _esmCount(...args);
+ return _esmUpdate(...args);
}
-export async function getByID(...args: Parameters): ReturnType {
- if (!_esmGetByID) {
- const imported = await import("../methods/docs.js");
- _esmGetByID = imported.getByID;
+export async function updateMultiple(
+ ...args: Parameters
+): ReturnType {
+ if (!_esmUpdateMultiple) {
+ const imported = await import("../methods/update.js");
+ _esmUpdateMultiple = imported.updateMultiple;
}
- return _esmGetByID(...args);
+ return _esmUpdateMultiple(...args);
}
export function requireLyra(callback: RequireCallback): void {
diff --git a/src/cjs/internals.cts b/src/cjs/internals.cts
index 273a5c0c1..339854207 100644
--- a/src/cjs/internals.cts
+++ b/src/cjs/internals.cts
@@ -1,4 +1,4 @@
-import type { BoundedMetric } from "../levenshtein.js";
+import type { BoundedMetric } from "../components/levenshtein.js";
export interface LyraInternals {
boundedLevenshtein(a: string, b: string, tolerance: number): BoundedMetric;
diff --git a/src/components/algorithms.ts b/src/components/algorithms.ts
new file mode 100644
index 000000000..bfd2cf1c5
--- /dev/null
+++ b/src/components/algorithms.ts
@@ -0,0 +1,42 @@
+import { createError } from "../errors.js";
+import { TokenScore, BM25Params } from "../types.js";
+
+export function prioritizeTokenScores(arrays: TokenScore[][], boost: number): TokenScore[] {
+ if (boost === 0) {
+ throw createError("INVALID_BOOST_VALUE");
+ }
+
+ const tokenMap: Record = {};
+
+ const mapsLength = arrays.length;
+ for (let i = 0; i < mapsLength; i++) {
+ const arr = arrays[i];
+
+ const entriesLength = arr.length;
+ for (let j = 0; j < entriesLength; j++) {
+ const [token, score] = arr[j];
+ const boostScore = score * boost;
+
+ if (token in tokenMap) {
+ tokenMap[token] *= 1.5 + boostScore;
+ } else {
+ tokenMap[token] = boostScore;
+ }
+ }
+ }
+
+ return Object.entries(tokenMap).sort((a, b) => b[1] - a[1]);
+}
+
+export function BM25(
+ tf: number,
+ matchingCount: number,
+ docsCount: number,
+ fieldLength: number,
+ averageFieldLength: number,
+ BM25Params: Required,
+) {
+ const { k, b, d } = BM25Params;
+ const idf = Math.log(1 + (docsCount - matchingCount + 0.5) / (matchingCount + 0.5));
+ return (idf * (d + tf * (k + 1))) / (tf + k * (1 - b + (b * fieldLength) / averageFieldLength));
+}
diff --git a/src/components/defaults.ts b/src/components/defaults.ts
new file mode 100644
index 000000000..a9f5ac4a5
--- /dev/null
+++ b/src/components/defaults.ts
@@ -0,0 +1,44 @@
+import { createError } from "../errors.js";
+import { Document, Schema, SimpleComponents } from "../types.js";
+import { getDocumentProperties, uniqueId } from "../utils.js";
+
+function validateSchema(doc: Document, schema: S): boolean {
+ for (const [prop, type] of Object.entries(schema)) {
+ if (typeof type === "object") {
+ if (!doc[prop] || (typeof doc[prop] !== "object" && Array.isArray(doc[prop]))) {
+ return false;
+ }
+
+ if (!validateSchema(doc[prop] as Document, type)) {
+ return false;
+ }
+ }
+
+ if (typeof doc[prop] !== type) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+export function getDefaultComponents(): SimpleComponents {
+ return {
+ validateSchema,
+ getDocumentIndexId(doc: Document): string {
+ if (doc.id) {
+ if (typeof doc.id !== "string") {
+ throw createError("DOCUMENT_ID_MUST_BE_STRING", typeof doc.id);
+ }
+
+ return doc.id;
+ }
+
+ return uniqueId();
+ },
+ getDocumentProperties,
+ formatElapsedTime(n: bigint): bigint {
+ return n;
+ },
+ };
+}
diff --git a/src/components/documents-store.ts b/src/components/documents-store.ts
new file mode 100644
index 000000000..8de8ab6ff
--- /dev/null
+++ b/src/components/documents-store.ts
@@ -0,0 +1,84 @@
+import { Document, IDocumentsStore, OpaqueDocumentStore, OpaqueIndex, Schema } from "../types.js";
+
+export interface DocumentsStore extends OpaqueDocumentStore {
+ docs: Record;
+ count: number;
+}
+
+type DefaultDocumentsStore = IDocumentsStore;
+
+function create(): DocumentsStore {
+ return {
+ docs: {},
+ count: 0,
+ };
+}
+
+function get(store: DocumentsStore, id: string): Document | undefined {
+ return store.docs[id];
+}
+
+function getMultiple(store: DocumentsStore, ids: string[]): (Document | undefined)[] {
+ const found: (Document | undefined)[] = Array.from({ length: ids.length });
+
+ for (let i = 0; i < ids.length; i++) {
+ found[i] = store.docs[ids[i]];
+ }
+
+ return found;
+}
+
+function store(store: DocumentsStore, id: string, doc: Document): boolean {
+ if (typeof store.docs[id] !== "undefined") {
+ return false;
+ }
+
+ store.docs[id] = doc;
+ store.count++;
+
+ return true;
+}
+
+function remove(store: DocumentsStore, id: string): boolean {
+ if (typeof store.docs[id] === "undefined") {
+ return false;
+ }
+
+ store.docs[id] = undefined;
+ store.count--;
+
+ return true;
+}
+
+function count(store: DocumentsStore): number {
+ return store.count;
+}
+
+function load(raw: R): DocumentsStore {
+ const rawDocument = raw as DocumentsStore;
+
+ return {
+ docs: rawDocument.docs,
+ count: rawDocument.count,
+ };
+}
+
+function save(docs: DocumentsStore): R {
+ return {
+ docs: docs.docs,
+ count: docs.count,
+ } as R;
+}
+
+export function createDocumentsStore(): DefaultDocumentsStore {
+ return {
+ create,
+ get,
+ getMultiple,
+ store,
+ remove,
+ count,
+ load,
+ save,
+ };
+}
diff --git a/src/components/facets.ts b/src/components/facets.ts
new file mode 100644
index 000000000..4a996fe8d
--- /dev/null
+++ b/src/components/facets.ts
@@ -0,0 +1,98 @@
+import type {
+ FacetResult,
+ FacetSorting,
+ Lyra,
+ NumberFacetDefinition,
+ OpaqueDocumentStore,
+ OpaqueIndex,
+ Schema,
+ SearchParams,
+ StringFacetDefinition,
+ TokenScore,
+} from "../types.js";
+import { getNested } from "../utils.js";
+
+function sortingPredicate(order: FacetSorting = "desc", a: [string, number], b: [string, number]) {
+ if (order.toLowerCase() === "asc") {
+ return a[1] - b[1];
+ } else {
+ return b[1] - a[1];
+ }
+}
+
+export async function getFacets(
+ lyra: Lyra,
+ results: TokenScore[],
+ facetsConfig: Required["facets"],
+): Promise {
+ const facets: FacetResult = {};
+ const allIDs = results.map(([id]) => id);
+ const allDocs = await lyra.documentsStore.getMultiple(lyra.data.docs, allIDs);
+ const facetKeys = Object.keys(facetsConfig!);
+
+ const properties = await lyra.index.getSearchablePropertiesWithTypes(lyra.data.index);
+
+ for (const facet of facetKeys) {
+ let values = {};
+
+ // Hack to guarantee the same order of ranges as specified by the user
+ // TODO: Revisit this once components land
+ if (properties[facet] === "number") {
+ const { ranges } = facetsConfig[facet] as NumberFacetDefinition;
+ const tmp = [];
+ for (const range of ranges) {
+ tmp.push([`${range.from}-${range.to}`, 0]);
+ }
+ values = Object.fromEntries(tmp);
+ }
+
+ facets[facet] = {
+ count: 0,
+ values,
+ };
+ }
+
+ const allDocsLength = allDocs.length;
+ for (let i = 0; i < allDocsLength; i++) {
+ const doc = allDocs[i];
+
+ for (const facet of facetKeys) {
+ const facetValue = facet.includes(".") ? getNested(doc!, facet)! : (doc![facet] as number | boolean);
+
+ // Range facets based on numbers
+ if (properties[facet] === "number") {
+ for (const range of (facetsConfig[facet] as NumberFacetDefinition).ranges) {
+ if (facetValue >= range.from && facetValue <= range.to) {
+ if (facets[facet].values[`${range.from}-${range.to}`] === undefined) {
+ facets[facet].values[`${range.from}-${range.to}`] = 1;
+ } else {
+ facets[facet].values[`${range.from}-${range.to}`]++;
+ }
+ }
+ }
+ } else {
+ // String or boolean based facets
+ const value = facetValue.toString();
+ facets[facet].values[value] = (facets[facet].values[value] ?? 0) + 1;
+ }
+ }
+ }
+
+ for (const facet of facetKeys) {
+ // Count the number of values for each facet
+ facets[facet].count = Object.keys(facets[facet].values).length;
+
+ // Sort only string-based facets
+ if (properties[facet] === "string") {
+ const stringFacetDefinition = facetsConfig as StringFacetDefinition;
+
+ facets[facet].values = Object.fromEntries(
+ Object.entries(facets[facet].values)
+ .sort((a, b) => sortingPredicate(stringFacetDefinition.sort, a, b))
+ .slice(stringFacetDefinition.offset ?? 0, stringFacetDefinition.limit ?? 10),
+ );
+ }
+ }
+
+ return facets;
+}
diff --git a/src/components/filters.ts b/src/components/filters.ts
new file mode 100644
index 000000000..2faa8d4a8
--- /dev/null
+++ b/src/components/filters.ts
@@ -0,0 +1,17 @@
+export function intersectFilteredIDs(filtered: string[], lookedUp: [string, number][]): [string, number][] {
+ const map = new Map();
+ const result: [string, number][] = [];
+
+ for (const id of filtered) {
+ map.set(id, true);
+ }
+
+ for (const [id, score] of lookedUp) {
+ if (map.has(id)) {
+ result.push([id, score]);
+ map.delete(id);
+ }
+ }
+
+ return result;
+}
diff --git a/src/components/hooks.ts b/src/components/hooks.ts
new file mode 100644
index 000000000..15ddaa2e5
--- /dev/null
+++ b/src/components/hooks.ts
@@ -0,0 +1,49 @@
+import {
+ Document,
+ Lyra,
+ MultipleCallbackComponent,
+ OpaqueDocumentStore,
+ OpaqueIndex,
+ Schema,
+ SingleCallbackComponent,
+} from "../types.js";
+
+export const COMPLEX_COMPONENTS = ["tokenizer", "index", "documentsStore"];
+
+export const SIMPLE_COMPONENTS = ["validateSchema", "getDocumentIndexId", "getDocumentProperties", "formatElapsedTime"];
+
+export const SIMPLE_OR_ARRAY_COMPONENTS = [
+ "beforeInsert",
+ "afterInsert",
+ "beforeRemove",
+ "afterRemove",
+ "beforeUpdate",
+ "afterUpdate",
+ "beforeMultipleInsert",
+ "afterMultipleInsert",
+ "beforeMultipleRemove",
+ "afterMultipleRemove",
+ "beforeMultipleUpdate",
+ "afterMultipleUpdate",
+];
+
+export async function runSingleHook(
+ hooks: SingleCallbackComponent[],
+ lyra: Lyra,
+ id: string,
+ doc?: Document,
+): Promise {
+ for (let i = 0; i < hooks.length; i++) {
+ await hooks[i](lyra, id, doc);
+ }
+}
+
+export async function runMultipleHook(
+ hooks: MultipleCallbackComponent[],
+ lyra: Lyra,
+ docsOrIds: Document[] | string[],
+): Promise {
+ for (let i = 0; i < hooks.length; i++) {
+ await hooks[i](lyra, docsOrIds);
+ }
+}
diff --git a/src/components/index.ts b/src/components/index.ts
new file mode 100644
index 000000000..3a8f5878d
--- /dev/null
+++ b/src/components/index.ts
@@ -0,0 +1,390 @@
+import { createError } from "../errors.js";
+import {
+ create as avlCreate,
+ find as avlFind,
+ greaterThan as avlGreaterThan,
+ insert as avlInsert,
+ lessThan as avlLessThan,
+ Node as AVLNode,
+ rangeSearch as avlRangeSearch,
+ removeDocument as avlRemoveDocument,
+} from "../trees/avl.js";
+import {
+ create as radixCreate,
+ find as radixFind,
+ insert as radixInsert,
+ Node as RadixNode,
+ removeDocumentByWord as radixRemoveDocument,
+} from "../trees/radix.js";
+import {
+ BM25Params,
+ ComparisonOperator,
+ IIndex,
+ Lyra,
+ OpaqueDocumentStore,
+ OpaqueIndex,
+ Schema,
+ SearchableType,
+ SearchableValue,
+ SearchContext,
+ Tokenizer,
+ TokenScore,
+} from "../types.js";
+import { intersect } from "../utils.js";
+import { BM25 } from "./algorithms.js";
+
+type FrequencyMap = {
+ [property: string]: {
+ [documentID: string]:
+ | {
+ [token: string]: number;
+ }
+ | undefined;
+ };
+};
+
+type BooleanIndex = {
+ true: string[];
+ false: string[];
+};
+
+export interface Index extends OpaqueIndex {
+ indexes: Record | BooleanIndex>;
+ searchableProperties: string[];
+ searchablePropertiesWithTypes: Record;
+ frequencies: FrequencyMap;
+ tokenOccurrencies: Record>;
+ avgFieldLength: Record;
+ fieldLengths: Record>;
+}
+
+type DefaultIndex = IIndex;
+
+function create(
+ lyra: Lyra,
+ schema: Schema,
+ index?: Index,
+ prefix = "",
+): Index {
+ if (!index) {
+ index = {
+ indexes: {},
+ searchableProperties: [],
+ searchablePropertiesWithTypes: {},
+ frequencies: {},
+ tokenOccurrencies: {},
+ avgFieldLength: {},
+ fieldLengths: {},
+ };
+ }
+
+ for (const [prop, type] of Object.entries(schema)) {
+ const typeActualType = typeof type;
+ const path = `${prefix}${prefix ? "." : ""}${prop}`;
+
+ if (typeActualType === "object" && !Array.isArray(type)) {
+ // Nested
+ create(lyra, type as Schema, index, path);
+ continue;
+ }
+
+ switch (type) {
+ case "boolean":
+ index.indexes[path] = { true: [], false: [] };
+ break;
+ case "number":
+ index.indexes[path] = avlCreate(0, []);
+ break;
+ case "string":
+ index.indexes[path] = radixCreate();
+ index.avgFieldLength[path] = 0;
+ index.frequencies[path] = {};
+ index.tokenOccurrencies[path] = {};
+ index.fieldLengths[path] = {};
+
+ break;
+ default:
+ throw createError("INVALID_SCHEMA_TYPE", Array.isArray(type) ? "array" : typeActualType);
+ }
+
+ index.searchableProperties.push(path);
+ index.searchablePropertiesWithTypes[path] = type;
+ }
+
+ return index;
+}
+
+function insert(
+ index: Index,
+ prop: string,
+ id: string,
+ value: SearchableValue,
+ language: string | undefined,
+ tokenizer: Tokenizer,
+ docsCount: number,
+): void {
+ if (typeof value === "number") {
+ avlInsert(index.indexes[prop] as AVLNode, value as number, [id]);
+ return;
+ } else if (typeof value === "boolean") {
+ (index.indexes[prop] as BooleanIndex)[value ? "true" : "false"].push(id);
+ return;
+ }
+
+ const tokens = tokenizer.tokenize(value as string, language);
+
+ if (!(id in index.frequencies[prop])) {
+ index.frequencies[prop][id] = {};
+ }
+
+ index.fieldLengths[prop][id] = tokens.length;
+ index.avgFieldLength[prop] = ((index.avgFieldLength[prop] ?? 0) * (docsCount - 1) + tokens.length) / docsCount;
+
+ for (const token of tokens) {
+ let tokenFrequency = 0;
+
+ for (const t of tokens) {
+ if (t === token) {
+ tokenFrequency++;
+ }
+ }
+
+ const tf = tokenFrequency / tokens.length;
+
+ index.frequencies[prop][id]![token] = tf;
+
+ if (!(token in index.tokenOccurrencies[prop])) {
+ index.tokenOccurrencies[prop][token] = 0;
+ }
+
+ // increase a token counter that may not yet exist
+ index.tokenOccurrencies[prop][token] = (index.tokenOccurrencies[prop][token] ?? 0) + 1;
+
+ radixInsert(index.indexes[prop] as RadixNode, token, id);
+ }
+}
+
+function remove(
+ index: Index,
+ prop: string,
+ id: string,
+ value: SearchableValue,
+ language: string | undefined,
+ tokenizer: Tokenizer,
+ docsCount: number,
+): boolean {
+ if (typeof value === "number") {
+ avlRemoveDocument(index.indexes[prop] as AVLNode, id, value);
+ return true;
+ } else if (typeof value === "boolean") {
+ const booleanKey = value ? "true" : "false";
+ const position = (index.indexes[prop] as BooleanIndex)[booleanKey].indexOf(id);
+
+ (index.indexes[prop] as BooleanIndex)[value ? "true" : "false"].splice(position, 1);
+ return true;
+ }
+
+ const tokens = tokenizer.tokenize(value as string, language);
+
+ index.avgFieldLength[prop] =
+ (index.avgFieldLength[prop] * docsCount - index.fieldLengths[prop][id]!) / (docsCount - 1);
+ index.fieldLengths[prop][id] = undefined;
+ index.frequencies[prop][id] = undefined;
+
+ for (const token of tokens) {
+ index.tokenOccurrencies[prop][token]--;
+ radixRemoveDocument(index.indexes[prop] as RadixNode, token, id);
+ }
+
+ return true;
+}
+
+function search(index: Index, prop: string, term: string, context: SearchContext): TokenScore[] {
+ if (!(prop in index.tokenOccurrencies)) {
+ return [];
+ }
+
+ // Exact fields for TF-IDF
+ const avgFieldLength = index.avgFieldLength[prop];
+ const fieldLengths = index.fieldLengths[prop];
+ const lyraOccurrencies = index.tokenOccurrencies[prop];
+ const lyraFrequencies = index.frequencies[prop];
+
+ // Performa the search
+ const rootNode = index.indexes[prop] as RadixNode;
+ const { exact, tolerance } = context.params;
+ const searchResult = radixFind(rootNode, { term, exact, tolerance });
+
+ const ids = new Set();
+
+ for (const key in searchResult) {
+ for (const id of searchResult[key]) {
+ ids.add(id);
+ }
+ }
+
+ const documentIDs = Array.from(ids);
+
+ // lyraOccurrencies[term] can be undefined, 0, string, or { [k: string]: number }
+ const termOccurrencies = typeof lyraOccurrencies[term] === "number" ? lyraOccurrencies[term] ?? 0 : 0;
+
+ const scoreList: TokenScore[] = [];
+
+ // Calculate TF-IDF value for each term, in each document, for each index.
+ const documentIDsLength = documentIDs.length;
+ for (let k = 0; k < documentIDsLength; k++) {
+ const id = documentIDs[k];
+ const tf = lyraFrequencies?.[id]?.[term] ?? 0;
+
+ const bm25 = BM25(
+ tf,
+ termOccurrencies,
+ context.docsCount,
+ fieldLengths[id]!,
+ avgFieldLength,
+ context.params.relevance! as Required,
+ );
+
+ scoreList.push([id, bm25]);
+ }
+
+ return scoreList;
+}
+
+function searchByWhereClause(index: Index, filters: Record): string[] {
+ const filterKeys = Object.keys(filters);
+
+ const filtersMap: Record = filterKeys.reduce(
+ (acc, key) => ({
+ [key]: [],
+ ...acc,
+ }),
+ {},
+ );
+
+ for (const param of filterKeys) {
+ const operation = filters[param];
+
+ if (typeof operation === "boolean") {
+ const idx = index.indexes[param] as BooleanIndex;
+ const filteredIDs = idx[operation.toString() as keyof BooleanIndex];
+ filtersMap[param].push(...filteredIDs);
+ continue;
+ }
+
+ const operationKeys = Object.keys(operation);
+
+ if (operationKeys.length > 1) {
+ throw createError("INVALID_FILTER_OPERATION", operationKeys.length);
+ }
+
+ const operationOpt = operationKeys[0] as ComparisonOperator;
+ const operationValue = operation[operationOpt as unknown as keyof ComparisonOperator];
+
+ const AVLNode = index.indexes[param] as AVLNode;
+
+ switch (operationOpt) {
+ case "gt": {
+ const filteredIDs = avlGreaterThan(AVLNode, operationValue, false);
+ filtersMap[param].push(...filteredIDs);
+ break;
+ }
+ case "gte": {
+ const filteredIDs = avlGreaterThan(AVLNode, operationValue, true);
+ filtersMap[param].push(...filteredIDs);
+ break;
+ }
+ case "lt": {
+ const filteredIDs = avlLessThan(AVLNode, operationValue, false);
+ filtersMap[param].push(...filteredIDs);
+ break;
+ }
+ case "lte": {
+ const filteredIDs = avlLessThan(AVLNode, operationValue, true);
+ filtersMap[param].push(...filteredIDs);
+ break;
+ }
+ case "eq": {
+ const filteredIDs = avlFind(AVLNode, operationValue) ?? [];
+ filtersMap[param].push(...filteredIDs);
+ break;
+ }
+ case "between": {
+ const [min, max] = operationValue as number[];
+ const filteredIDs = avlRangeSearch(AVLNode, min, max);
+ filtersMap[param].push(...filteredIDs);
+ }
+ }
+ }
+
+ // AND operation: calculate the intersection between all the IDs in filterMap
+ const result = intersect(Object.values(filtersMap)) as unknown as string[];
+
+ return result;
+}
+
+function getSearchableProperties(index: Index): string[] {
+ return index.searchableProperties;
+}
+
+function getSearchablePropertiesWithTypes(index: Index): Record {
+ return index.searchablePropertiesWithTypes;
+}
+
+function load(raw: R): Index {
+ const {
+ indexes,
+ searchableProperties,
+ searchablePropertiesWithTypes,
+ frequencies,
+ tokenOccurrencies,
+ avgFieldLength,
+ fieldLengths,
+ } = raw as Index;
+
+ return {
+ indexes,
+ searchableProperties,
+ searchablePropertiesWithTypes,
+ frequencies,
+ tokenOccurrencies,
+ avgFieldLength,
+ fieldLengths,
+ };
+}
+
+function save(index: Index): R {
+ const {
+ indexes,
+ searchableProperties,
+ searchablePropertiesWithTypes,
+ frequencies,
+ tokenOccurrencies,
+ avgFieldLength,
+ fieldLengths,
+ } = index;
+
+ return {
+ indexes,
+ searchableProperties,
+ searchablePropertiesWithTypes,
+ frequencies,
+ tokenOccurrencies,
+ avgFieldLength,
+ fieldLengths,
+ } as R;
+}
+
+export function createIndex(): DefaultIndex {
+ return {
+ create,
+ insert,
+ remove,
+ search,
+ searchByWhereClause,
+ getSearchableProperties,
+ getSearchablePropertiesWithTypes,
+ load,
+ save,
+ };
+}
diff --git a/src/levenshtein.ts b/src/components/levenshtein.ts
similarity index 100%
rename from src/levenshtein.ts
rename to src/components/levenshtein.ts
index 81751151e..78d669410 100644
--- a/src/levenshtein.ts
+++ b/src/components/levenshtein.ts
@@ -3,20 +3,6 @@ export type BoundedMetric = {
distance: number;
};
-/**
- * Computes the Levenshtein distance between two strings (a, b), returning early with -1 if the distance
- * is greater than the given tolerance.
- * It assumes that:
- * - tolerance >= ||a| - |b|| >= 0
- */
-export function boundedLevenshtein(a: string, b: string, tolerance: number): BoundedMetric {
- const distance = _boundedLevenshtein(a, b, tolerance);
- return {
- distance,
- isBounded: distance >= 0,
- };
-}
-
/**
* Inspired by:
* https://github.com/Yomguithereal/talisman/blob/86ae55cbd040ff021d05e282e0e6c71f2dde21f8/src/metrics/levenshtein.js#L218-L340
@@ -136,6 +122,20 @@ function _boundedLevenshtein(a: string, b: string, tolerance: number): number {
return current <= tolerance ? current : -1;
}
+/**
+ * Computes the Levenshtein distance between two strings (a, b), returning early with -1 if the distance
+ * is greater than the given tolerance.
+ * It assumes that:
+ * - tolerance >= ||a| - |b|| >= 0
+ */
+export function boundedLevenshtein(a: string, b: string, tolerance: number): BoundedMetric {
+ const distance = _boundedLevenshtein(a, b, tolerance);
+ return {
+ distance,
+ isBounded: distance >= 0,
+ };
+}
+
export function levenshtein(a: string, b: string): number {
/* c8 ignore next 3 */
if (!a.length) {
diff --git a/src/components/sync-blocking-checker.ts b/src/components/sync-blocking-checker.ts
new file mode 100644
index 000000000..9d4bb70ad
--- /dev/null
+++ b/src/components/sync-blocking-checker.ts
@@ -0,0 +1,54 @@
+import { kInsertions, kRemovals, Lyra, OpaqueDocumentStore, OpaqueIndex, Schema } from "../types.js";
+
+// Web platforms don't have process. React-Native doesn't have process.emitWarning.
+const warn =
+ globalThis.process?.emitWarning ??
+ function emitWarning(message: string, options: { code: string }) {
+ console.warn(`[WARNING] [${options.code}] ${message}`);
+ };
+
+export function trackInsertion(
+ lyra: Lyra,
+): void {
+ if (typeof lyra[kInsertions] !== "number") {
+ queueMicrotask(() => {
+ lyra[kInsertions] = undefined;
+ });
+
+ lyra[kInsertions] = 0;
+ }
+
+ if (lyra[kInsertions]! > 1000) {
+ warn(
+ "Lyra's insert operation is synchronous. Please avoid inserting a large number of document in a single operation in order not to block the main thread or, in alternative, please use insertMultiple.",
+ { code: "LYRA0001" },
+ );
+
+ lyra[kInsertions] = -1;
+ } else if (lyra[kInsertions] >= 0) {
+ lyra[kInsertions]++;
+ }
+}
+
+export function trackRemoval(
+ lyra: Lyra,
+): void {
+ if (typeof lyra[kRemovals] !== "number") {
+ queueMicrotask(() => {
+ lyra[kRemovals] = undefined;
+ });
+
+ lyra[kRemovals] = 0;
+ }
+
+ if (lyra[kRemovals]! > 1000) {
+ warn(
+ "Lyra's remove operation is synchronous. Please avoid removing a large number of document in a single operation in order not to block the main thread, in alternative, please use updateMultiple.",
+ { code: "LYRA0002" },
+ );
+
+ lyra[kRemovals] = -1;
+ } else if (lyra[kRemovals] >= 0) {
+ lyra[kRemovals]++;
+ }
+}
diff --git a/src/errors.ts b/src/errors.ts
index 30f8d4f6b..49ba13a8d 100644
--- a/src/errors.ts
+++ b/src/errors.ts
@@ -1,85 +1,39 @@
import { SUPPORTED_LANGUAGES } from "./tokenizer/languages.js";
-
-function formatJSON(input: object) {
- return JSON.stringify(input, null, 2);
-}
-
-export function INVALID_SCHEMA_TYPE(type: string): string {
- return `Invalid schema type. Expected string or object, but got ${type}`;
-}
-
-export function INVALID_DOC_SCHEMA(expected: object, found: object): string {
- return `Invalid document structure. \nLyra has been initialized with the following schema: \n\n${formatJSON(
- expected,
- )}\n\nbut found the following doc:\n\n${formatJSON(found)}`;
-}
-
-export function INVALID_PROPERTY(name: string, expected: string[]): string {
- return `Invalid property name. Expected a wildcard string ("*") or array containing one of the following properties: ${expected.join(
- ", ",
- )}, but got: ${name}`;
-}
-
-export function CANT_DELETE_DOC_NOT_FOUND(id: string): string {
- return `Document with ID ${id} does not exist.`;
-}
-
-export function CANT_DELETE_DOCUMENT(docID: string, key: string, token: string): string {
- return `Unable to delete document "${docID}" from index "${key}" on word "${token}".`;
-}
-
-export function UNSUPPORTED_NESTED_PROPERTIES(): string {
- return `Nested properties are not supported in this Lyra version, but will be in the future.`;
-}
-
-export function DOC_ID_DOES_NOT_EXISTS(id: string): string {
- return `Document with ID ${id} does not exists`;
-}
-
-export function GETTER_SETTER_WORKS_ON_EDGE_ONLY(method: string): string {
- return `${method} works on edge only. Use edge: true in Lyra constructor to enable it.`;
-}
-
-export function INVALID_HOOKS_OBJECT(): string {
- return "Invalid hooks object";
+import { sprintf } from "./utils.js";
+
+const allLanguages = SUPPORTED_LANGUAGES.join("\n - ");
+
+const errors = {
+ NO_LANGUAGE_WITH_CUSTOM_TOKENIZER: "Do not pass the language option to create when using a custom tokenizer.",
+ LANGUAGE_NOT_SUPPORTED: `Language "%s" is not supported.\nSupported languages are:\n - ${allLanguages}`,
+ INVALID_STEMMER_FUNCTION_TYPE: `config.stemmer property must be a function.`,
+ CUSTOM_STOP_WORDS_MUST_BE_FUNCTION_OR_ARRAY: "Custom stop words array must only contain strings.",
+ UNSUPPORTED_COMPONENT: `Unsupported component "%s".`,
+ COMPONENT_MUST_BE_FUNCTION: `The component "%s" must be a function.`,
+ COMPONENT_MUST_BE_FUNCTION_OR_ARRAY_FUNCTIONS: `The component "%s" must be a function or an array of functions.`,
+ INVALID_SCHEMA_TYPE: `Unsupported schema type "%s". Expected "string", "boolean" or "number".`,
+ DOCUMENT_ID_MUST_BE_STRING: `Document id must be of type "string". Got "%s" instead.`,
+ DOCUMENT_ALREADY_EXISTS: `A document with id "%s" already exists.`,
+ DOCUMENT_DOES_NOT_EXIST: `A document with id "%s" does not exists.`,
+ MISSING_DOCUMENT_PROPERTY: `Missing searchable property "%s".`,
+ INVALID_DOCUMENT_PROPERTY: `Invalid document property "%s": expected "%s", got "%s"`,
+ UNKNOWN_INDEX: `Invalid property name "%s". Expected a wildcard string ("*") or array containing one of the following properties: %s`,
+ INVALID_BOOST_VALUE: `Boost value must be a number greater than, or less than 0.`,
+ INVALID_FILTER_OPERATION: `You can only use one operation per filter, you requested %d.`,
+};
+
+export type ErrorCode = keyof typeof errors;
+
+export interface LyraError extends Error {
+ code: string;
+}
+
+export function createError(code: ErrorCode, ...args: Array): LyraError {
+ const error = new Error(sprintf(errors[code] ?? `Unsupported Lyra Error code: ${code}`, ...args)) as LyraError;
+ error.code = code;
+ if ("captureStackTrace" in Error.prototype) {
+ Error.captureStackTrace(error);
+ }
+
+ return error;
}
-
-export function NON_SUPPORTED_HOOKS(invalidHooks: string[]): string {
- return `The following hooks aren't supported. Hooks: ${invalidHooks}`;
-}
-
-export function TYPE_ERROR_ID_MUST_BE_STRING(type: string): string {
- return `"id" must be of type "string". Got "${type}" instead.`;
-}
-
-export function ID_ALREADY_EXISTS(id: string): string {
- return `Document with ID "${id}" already exists.`;
-}
-
-export function LANGUAGE_NOT_SUPPORTED(lang: string): string {
- return `Language "${lang}" is not supported.\nSupported languages are:\n - ${SUPPORTED_LANGUAGES.join("\n - ")}`;
-}
-
-export function CUSTOM_STOP_WORDS_ARRAY_MUST_BE_STRING_ARRAY(): string {
- return `Custom stop words array must only contain strings.`;
-}
-
-export function CUSTOM_STOP_WORDS_MUST_BE_FUNCTION_OR_ARRAY(): string {
- return `Custom stop words must be a function or an array of strings.`;
-}
-
-export function INVALID_STEMMER_FUNCTION_TYPE(): string {
- return `tokenizer.stemmingFn property must be a function.`;
-}
-
-export function INVALID_TOKENIZER_FUNCTION(): string {
- return `tokenizer.tokenizerFn must be a function.`;
-}
-
-export function INVALID_BOOST_VALUE(): string {
- return `Boost value must be a number greater than, or less than 0.`;
-}
-
-export function INVALID_FILTER_OPERATION(found: string[]): string {
- return `You can only use one operation per filter. Found ${found.length}: ${found.join(", ")}`;
-}
\ No newline at end of file
diff --git a/src/facets.ts b/src/facets.ts
deleted file mode 100644
index bfc58db88..000000000
--- a/src/facets.ts
+++ /dev/null
@@ -1,117 +0,0 @@
-import type { FacetSorting, FacetsSearch, PropertiesSchema, ResolveSchema, TokenScore } from "./types/index.js";
-import { getNested } from './utils.js';
-
-export type FacetReturningValue = {
- [key: string]: {
- count: number;
- values: {
- [key: string]: number;
- }
- }
-}
-
-export function getFacets(schema: PropertiesSchema, docs: Record | undefined>, results: TokenScore[], facetsConfig: FacetsSearch): FacetReturningValue {
- const facets: FacetReturningValue = {};
- const allIDs = results.map(([id]) => id);
- const allDocs = allIDs.map((id) => docs[id]);
- const facetKeys = Object.keys(facetsConfig);
-
- for (const facet of facetKeys) {
- const facetType = getFacetType(schema, facet);
- let values = {};
-
- // Hack to guarantee the same order of ranges as specified by the user
- if (facetType === "number") {
- const { ranges } = (facetsConfig as any)[facet];
- const tmp = [];
- for (const range of ranges) {
- tmp.push([`${range.from}-${range.to}`, 0]);
- }
- values = Object.fromEntries(tmp as any);
- }
-
- facets[facet] = {
- count: 0,
- values,
- };
- }
-
- const allDocsLength = allDocs.length;
- for (let i = 0; i < allDocsLength; i++) {
- const doc = allDocs[i];
-
- for (const facet of facetKeys) {
- const facetValue = facet.includes('.')
- ? getNested(doc!, facet)!
- : doc![facet] as number | boolean;
-
- // String based facets
- if (typeof facetValue === "string") {
- if (facets[facet].values[facetValue] === undefined) {
- facets[facet].values[facetValue] = 1;
- } else {
- facets[facet].values[facetValue]++;
- }
-
- // Boolean facets
- } else if (typeof facetValue === "boolean") {
- if (facets[facet].values[facetValue.toString()] === undefined) {
- facets[facet].values[facetValue.toString()] = 1;
- } else {
- facets[facet].values[facetValue.toString()]++;
- }
- }
-
- // Range facets based on numbers
- else if (typeof facetValue === "number") {
- for (const range of (facetsConfig as any)[facet].ranges) {
- if (facetValue >= range.from && facetValue <= range.to) {
- if (facets[facet].values[`${range.from}-${range.to}`] === undefined) {
- facets[facet].values[`${range.from}-${range.to}`] = 1;
- } else {
- facets[facet].values[`${range.from}-${range.to}`]++;
- }
- }
- }
- }
- }
- }
-
- for (const facet of facetKeys) {
- const facetType = getFacetType(schema, facet);
-
- // Count the number of values for each facet
- facets[facet].count = Object.keys(facets[facet].values).length;
-
- // Sort only string-based facets
- if (facetType === "string") {
- facets[facet].values = Object.fromEntries(
- Object.entries(facets[facet].values)
- .sort((a, b) => sortingPredicate((facetsConfig as any)[facet].sort, a, b))
- .slice((facetsConfig as any)[facet].offset ?? 0, (facetsConfig as any)[facet].limit ?? 10),
- )
- }
- }
-
- return facets;
-}
-
-const facetTypeCache = new Map();
-
-function getFacetType(schema: PropertiesSchema, facet: string) {
- if (facetTypeCache.has(facet)) {
- return facetTypeCache.get(facet)!;
- }
-
- const facetType = getNested(schema, facet)!;
- facetTypeCache.set(facet, facetType);
- return facetType;
-}
-
-function sortingPredicate(order: FacetSorting = "desc", a: [string, number], b: [string, number]) {
- if (order.toLowerCase() === "asc") {
- return a[1] - b[1];
- } else {
- return b[1] - a[1];
- }
-}
\ No newline at end of file
diff --git a/src/filters.ts b/src/filters.ts
deleted file mode 100644
index 8edf58dfe..000000000
--- a/src/filters.ts
+++ /dev/null
@@ -1,103 +0,0 @@
-import type { WhereFilter, FilterOperation, PropertiesSchema, Lyra, BooleanIndex } from "./types/index.js";
-import type { AVLNode } from "./trees/avl/node.js";
-import { greaterThan, lessThan, rangeSearch, find } from "./trees/avl/index.js";
-import { intersect } from './utils.js'
-import * as ERRORS from "./errors.js";
-
-export function getWhereFiltersIDs(filters: WhereFilter, lyra: Lyra): string[] {
- const filterKeys = Object.keys(filters);
-
- const filtersMap: Record = filterKeys.reduce((acc, key) => ({
- [key]: [],
- ...acc,
- }), {});
-
- for (const param of filterKeys) {
- const operation = filters[param as keyof WhereFilter]
- const operationKeys = Object.keys(operation as unknown as FilterOperation[])
-
- if (operationKeys.length > 1) {
- throw new Error(ERRORS.INVALID_FILTER_OPERATION(operationKeys))
- }
-
- if (typeof operation === 'boolean') {
- const idx = lyra.index[param] as BooleanIndex;
- // eslint-disable-next-line @typescript-eslint/ban-ts-comment
- // @ts-ignore - this is a bug in the typescript compiler
- const filteredIDs = idx[operation.toString() as keyof BooleanIndex];
- filtersMap[param].push(...filteredIDs);
- }
-
- const operationOpt = operationKeys[0] as FilterOperation
- const operationValue = operation[operationOpt as keyof typeof operation];
-
- const AVLNode = lyra.index[param] as AVLNode;
-
- switch (operationOpt) {
- case "gt": {
- // eslint-disable-next-line @typescript-eslint/ban-ts-comment
- // @ts-ignore - this is a bug in the typescript compiler
- const filteredIDs = greaterThan(AVLNode, operationValue, false);
- filtersMap[param].push(...filteredIDs);
- break;
- }
- case "gte": {
- // eslint-disable-next-line @typescript-eslint/ban-ts-comment
- // @ts-ignore - this is a bug in the typescript compiler
- const filteredIDs = greaterThan(AVLNode, operationValue, true);
- filtersMap[param].push(...filteredIDs);
- break;
- }
- case "lt": {
- // eslint-disable-next-line @typescript-eslint/ban-ts-comment
- // @ts-ignore - this is a bug in the typescript compiler
- const filteredIDs = lessThan(AVLNode, operationValue, false);
- filtersMap[param].push(...filteredIDs);
- break;
- }
- case "lte": {
- // eslint-disable-next-line @typescript-eslint/ban-ts-comment
- // @ts-ignore - this is a bug in the typescript compiler
- const filteredIDs = lessThan(AVLNode, operationValue, true);
- filtersMap[param].push(...filteredIDs);
- break;
- }
- case "eq": {
- // eslint-disable-next-line @typescript-eslint/ban-ts-comment
- // @ts-ignore - this is a bug in the typescript compiler
- const filteredIDs = find(AVLNode, operationValue) ?? [];
- filtersMap[param].push(...filteredIDs);
- break;
- }
- case "between": {
- // eslint-disable-next-line @typescript-eslint/ban-ts-comment
- // @ts-ignore - this is a bug in the typescript compiler
- const filteredIDs = rangeSearch(AVLNode, operationValue[0], operationValue[1]);
- filtersMap[param].push(...filteredIDs);
- }
- }
- }
-
- // AND operation: calculate the intersection between all the IDs in filterMap
- const result = intersect(Object.values(filtersMap)) as unknown as string[];
-
- return result;
-}
-
-export function intersectFilteredIDs(filtered: string[], lookedUp: [string, number][]): [string, number][] {
- const map = new Map();
- const result: [string, number][] = [];
-
- for (const id of filtered) {
- map.set(id, true);
- }
-
- for (const [id, score] of lookedUp) {
- if (map.has(id)) {
- result.push([id, score]);
- map.delete(id);
- }
- }
-
- return result;
-}
\ No newline at end of file
diff --git a/src/index.ts b/src/index.ts
index 86463a620..ebc431223 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -1,13 +1,11 @@
export { create } from "./methods/create.js";
-export { insert, insertBatch, insertWithHooks } from "./methods/insert.js";
-export { load } from "./methods/load.js";
-export { remove } from "./methods/remove.js";
-export { save } from "./methods/save.js";
+export { insert, insertMultiple } from "./methods/insert.js";
+export { load, save } from "./methods/serialization.js";
+export { remove, removeMultiple } from "./methods/remove.js";
+export { update, updateMultiple } from "./methods/update.js";
export { search } from "./methods/search.js";
export { getByID, count } from "./methods/docs.js";
-export * from "./types/index.js";
+export * from "./types.js";
export type { Language } from "./tokenizer/languages.js";
-export type { InsertConfig, InsertBatchConfig } from "./methods/insert.js";
-export type { RetrievedDoc, SearchParams, SearchResult } from "./methods/search.js";
-export type { Stemmer, TokenizerConfig, Tokenizer } from "./tokenizer/index.js";
+export type { Stemmer, TokenizerConfig } from "./tokenizer/index.js";
diff --git a/src/insertion-checker.ts b/src/insertion-checker.ts
deleted file mode 100644
index f92db569b..000000000
--- a/src/insertion-checker.ts
+++ /dev/null
@@ -1,31 +0,0 @@
-const kInsertions = Symbol("lyra.insertions");
-
-// Web platforms don't have process. React-Native doesn't have process.emitWarning.
-const warn =
- globalThis.process?.emitWarning ??
- function emitWarning(message: string, options: { code: string }) {
- console.warn(`[WARNING] [${options.code}] ${message}`);
- };
-
-export function trackInsertion(_lyra: unknown) {
- const lyra = _lyra as object & { [kInsertions]?: number };
-
- if (typeof lyra[kInsertions] !== "number") {
- queueMicrotask(() => {
- lyra[kInsertions] = undefined;
- });
-
- lyra[kInsertions] = 0;
- }
-
- if (lyra[kInsertions]! > 1000) {
- warn(
- "Lyra's insert operation is synchronous. Please avoid inserting a large number of document in a single operation in order not to block the main thread.",
- { code: "LYRA0001" },
- );
-
- lyra[kInsertions] = -1;
- } else if (lyra[kInsertions] >= 0) {
- lyra[kInsertions]++;
- }
-}
diff --git a/src/internals.ts b/src/internals.ts
index 4de224af4..555e06c1a 100644
--- a/src/internals.ts
+++ b/src/internals.ts
@@ -1,3 +1,3 @@
-export { boundedLevenshtein } from "./levenshtein.js";
-export { formatNanoseconds, getNanosecondsTime } from "./utils.js";
-export { tokenize, normalizationCache, defaultTokenizerConfig } from "./tokenizer/index.js";
+export { boundedLevenshtein } from "./components/levenshtein.js";
+export { sprintf, formatBytes, formatNanoseconds, getNanosecondsTime, uniqueId } from "./utils.js";
+export { normalizationCache, createTokenizer } from "./tokenizer/index.js";
diff --git a/src/methods/common.ts b/src/methods/common.ts
deleted file mode 100644
index 848630566..000000000
--- a/src/methods/common.ts
+++ /dev/null
@@ -1,56 +0,0 @@
-import * as ERRORS from "../errors.js";
-import type { Lyra, PropertiesSchema, ResolveSchema } from "../types/index.js";
-import type { SearchParams } from "./search.js";
-
-export function assertDocSchema(doc: ResolveSchema, lyraSchema: PropertiesSchema) {
- if (!recursiveCheckDocSchema(doc, lyraSchema)) {
- throw new Error(ERRORS.INVALID_DOC_SCHEMA(lyraSchema, doc));
- }
-}
-
-export function recursiveCheckDocSchema(
- newDoc: ResolveSchema,
- schema: PropertiesSchema,
-): boolean {
- for (const key in newDoc) {
- if (!(key in schema)) {
- continue;
- }
-
- const propType = typeof newDoc[key];
-
- if (propType === "object") {
- recursiveCheckDocSchema(newDoc[key] as ResolveSchema, schema);
- } else if (typeof newDoc[key] !== schema[key]) {
- return false;
- }
- }
-
- return true;
-}
-
-export function getIndices(
- lyra: Lyra,
- indices: SearchParams["properties"],
-): string[] {
- const knownIndices = Object.keys(lyra.index);
-
- if (!indices) {
- return knownIndices;
- }
-
- if (typeof indices === "string") {
- if (indices !== "*") {
- throw new Error(ERRORS.INVALID_PROPERTY(indices, knownIndices));
- }
- return knownIndices;
- }
-
- for (const index of indices as string[]) {
- if (!knownIndices.includes(index)) {
- throw new Error(ERRORS.INVALID_PROPERTY(index, knownIndices));
- }
- }
-
- return indices as string[];
-}
diff --git a/src/methods/create.ts b/src/methods/create.ts
index bd42874e3..86644f518 100644
--- a/src/methods/create.ts
+++ b/src/methods/create.ts
@@ -1,86 +1,147 @@
-import type { Configuration, Lyra, PropertiesSchema } from "../types/index.js";
-import { defaultTokenizerConfig, Language } from "../tokenizer/index.js";
-import * as ERRORS from "../errors.js";
-import { create as createNode } from "../trees/radix/node.js";
-import { create as createAVLNode } from "../trees/avl/index.js";
-import { validateHooks } from "./hooks.js";
-import { intersectTokenScores } from "../algorithms.js";
-
-/**
- * Creates a new database.
- * @param properties Options to initialize the database with.
- * @example
- * // Create a database that stores documents containing 'author' and 'quote' fields.
- * const db = await create({
- * schema: {
- * author: 'string',
- * quote: 'string'
- * },
- * hooks: {
- * afterInsert: [afterInsertHook],
- * }
- * });
- */
-export async function create(properties: Configuration): Promise> {
- const defaultLanguage = (properties?.defaultLanguage?.toLowerCase() as Language) ?? "english";
-
- const tokenizer = defaultTokenizerConfig(defaultLanguage, properties.components?.tokenizer ?? {});
- tokenizer.assertSupportedLanguage(defaultLanguage);
-
- validateHooks(properties.hooks);
-
- const instance: Lyra = {
- defaultLanguage,
- schema: properties.schema,
- docs: {},
- docsCount: 0,
- index: {},
- hooks: properties.hooks || {},
- edge: properties.edge ?? false,
- frequencies: {},
- tokenOccurrencies: {},
- avgFieldLength: {},
- fieldLengths: {},
- components: {
- elapsed: properties.components?.elapsed ?? {},
- tokenizer,
- algorithms: {
- intersectTokenScores: properties.components?.algorithms?.intersectTokenScores ?? intersectTokenScores,
- },
- },
- };
+import { getDefaultComponents } from "../components/defaults.js";
+import { createDocumentsStore } from "../components/documents-store.js";
+import { COMPLEX_COMPONENTS, SIMPLE_COMPONENTS, SIMPLE_OR_ARRAY_COMPONENTS } from "../components/hooks.js";
+import { createIndex } from "../components/index.js";
+import { createError } from "../errors.js";
+import { createTokenizer } from "../tokenizer/index.js";
+import {
+ ArrayCallbackComponents,
+ Components,
+ IDocumentsStore,
+ IIndex,
+ Lyra,
+ OpaqueDocumentStore,
+ OpaqueIndex,
+ Schema,
+ SimpleComponents,
+ SimpleOrArrayCallbackComponents,
+} from "../types.js";
- buildIndex(instance, properties.schema);
- return instance;
+interface CreateArguments {
+ schema: Schema;
+ language?: string;
+ components?: Components;
}
-function buildIndex(lyra: Lyra, schema: S, prefix = "") {
- for (const prop of Object.keys(schema)) {
- const propType = typeof prop;
- const isNested = typeof schema[prop] === "object";
-
- if (propType !== "string") throw new Error(ERRORS.INVALID_SCHEMA_TYPE(propType));
+function validateComponents(
+ components: Components,
+) {
+ const defaultComponents = getDefaultComponents();
- const propName = `${prefix}${prop}`;
+ for (const rawKey of SIMPLE_COMPONENTS) {
+ const key = rawKey as keyof SimpleComponents;
- if (isNested) {
- buildIndex(lyra, schema[prop] as S, `${propName}.`);
- } else {
- if (schema[prop] === "string") {
- lyra.index[propName] = createNode();
- lyra.avgFieldLength[propName] = 0;
- continue;
- }
-
- if (schema[prop] === "number") {
- lyra.index[propName] = createAVLNode(0, []);
- continue;
+ if (components[key]) {
+ if (typeof components[key] !== "function") {
+ throw createError("COMPONENT_MUST_BE_FUNCTION", key);
}
+ } else {
+ // @ts-expect-error TSC is unable to resolve this
+ components[key] = defaultComponents[key];
+ }
+ }
+
+ for (const rawKey of SIMPLE_OR_ARRAY_COMPONENTS) {
+ const key = rawKey as keyof ArrayCallbackComponents;
- if (schema[prop] === "boolean") {
- lyra.index[propName] = { 'true': [], 'false': [] };
- continue;
+ if (!components[key]) {
+ components[key] = [];
+ } else if (!Array.isArray(components[key])) {
+ // @ts-expect-error TSC is unable to resolve this
+ components[key] = [components[key]];
+ }
+
+ for (const fn of components[key] as unknown as SimpleOrArrayCallbackComponents[]) {
+ if (typeof fn !== "function") {
+ throw createError("COMPONENT_MUST_BE_FUNCTION_OR_ARRAY_FUNCTIONS", key);
}
}
}
+
+ for (const rawKey of Object.keys(components)) {
+ if (
+ !COMPLEX_COMPONENTS.includes(rawKey) &&
+ !SIMPLE_COMPONENTS.includes(rawKey) &&
+ !SIMPLE_OR_ARRAY_COMPONENTS.includes(rawKey)
+ ) {
+ throw createError("UNSUPPORTED_COMPONENT", rawKey);
+ }
+ }
+}
+
+export async function create({
+ schema,
+ language,
+ components,
+}: CreateArguments): Promise> {
+ if (!components) {
+ components = {};
+ }
+
+ let tokenizer = components.tokenizer;
+ let index = components.index;
+ let documentsStore = components.documentsStore;
+
+ if (!tokenizer) {
+ // Use the default tokenizer
+ tokenizer = await createTokenizer(language ?? "english");
+ } else if (language) {
+ // Accept language only if a tokenizer is not provided
+ throw createError("NO_LANGUAGE_WITH_CUSTOM_TOKENIZER");
+ }
+
+ if (!index) {
+ index = createIndex() as unknown as IIndex;
+ }
+
+ if (!documentsStore) {
+ documentsStore = createDocumentsStore() as unknown as IDocumentsStore;
+ }
+
+ // Validate all other components
+ validateComponents(components);
+
+ // Assign only recognized components and hooks
+ const {
+ getDocumentProperties,
+ getDocumentIndexId,
+ validateSchema,
+ beforeInsert,
+ afterInsert,
+ beforeRemove,
+ afterRemove,
+ beforeMultipleInsert,
+ afterMultipleInsert,
+ beforeMultipleRemove,
+ afterMultipleRemove,
+ formatElapsedTime,
+ } = components;
+
+ const lyra = {
+ data: {},
+ caches: {},
+ schema,
+ tokenizer,
+ index,
+ documentsStore,
+ getDocumentProperties,
+ getDocumentIndexId,
+ validateSchema,
+ beforeInsert,
+ afterInsert,
+ beforeRemove,
+ afterRemove,
+ beforeMultipleInsert,
+ afterMultipleInsert,
+ beforeMultipleRemove,
+ afterMultipleRemove,
+ formatElapsedTime,
+ } as Lyra;
+
+ lyra.data = {
+ index: await lyra.index.create(lyra, schema),
+ docs: await lyra.documentsStore.create(lyra),
+ };
+
+ return lyra;
}
diff --git a/src/methods/docs.ts b/src/methods/docs.ts
index 8234b62d5..239118455 100644
--- a/src/methods/docs.ts
+++ b/src/methods/docs.ts
@@ -1,33 +1,14 @@
-import type { PropertiesSchema, Lyra, ResolveSchema } from "../types/index.js";
+import { Document, Lyra, OpaqueDocumentStore, OpaqueIndex, Schema } from "../types.js";
-/**
- * Gets a document from a Lyra database by its ID.
- * @template S - The schema type for the Lyra database.
- * @param {Lyra} db - The Lyra database to get the document from.
- * @param {string} id - The ID of the document to get.
- * @returns {Promise | undefined>} - The document with the given ID, or undefined if it doesn't exist.
- * @example
- *
- * import { getByID } from '@lyrasearch/lyra';
- *
- * const doc = await getByID(db, 'doc1'); // { id: 'doc1', title: 'Hello World' }
- * const doc = await getByID(db, 'doc4'); // undefined
- */
-export async function getByID(db: Lyra, id: string): Promise | undefined> {
- return db.docs[id];
+export function getByID(
+ db: Lyra,
+ id: string,
+): Promise {
+ return db.documentsStore.get(db.data.docs, id) as Promise;
}
-/**
- * Counts the number of documents in a Lyra database.
- * @template S - The schema type for the Lyra database.
- * @param {Lyra} db - The Lyra database to count documents in.
- * @returns {Promise} - The number of documents in the Lyra database.
- * @example
- *
- * import { count } from '@lyrasearch/lyra';
- *
- * const numDocs = await count(db); // 3
-*/
-export async function count(db: Lyra): Promise {
- return Object.keys(db.docs).length;
+export function count(
+ db: Lyra,
+): Promise {
+ return db.documentsStore.count(db.data.docs) as Promise;
}
diff --git a/src/methods/hooks.ts b/src/methods/hooks.ts
deleted file mode 100644
index a05f38931..000000000
--- a/src/methods/hooks.ts
+++ /dev/null
@@ -1,37 +0,0 @@
-import * as ERRORS from "../errors.js";
-import type { Lyra, PropertiesSchema } from "../types/index.js";
-
-export interface AfterInsertHook {
- (this: Lyra, id: string): Promise | void;
-}
-
-export type Hooks = {
- afterInsert?: AfterInsertHook | AfterInsertHook[];
-};
-
-const SUPPORTED_HOOKS = ["afterInsert"];
-
-export function validateHooks(hooks?: Hooks): void | never {
- if (hooks) {
- if (typeof hooks !== "object") {
- throw new Error(ERRORS.INVALID_HOOKS_OBJECT());
- }
-
- const invalidHooks = Object.keys(hooks).filter(hook => !SUPPORTED_HOOKS.includes(hook));
- if (invalidHooks.length) {
- throw new Error(ERRORS.NON_SUPPORTED_HOOKS(invalidHooks));
- }
- }
-}
-
-export async function hookRunner(
- this: Lyra,
- // eslint-disable-next-line @typescript-eslint/ban-types
- funcs: Function | Function[],
- ...args: unknown[]
-): Promise {
- const hooks = Array.isArray(funcs) ? funcs : [funcs];
- for (let i = 0; i < hooks.length; i++) {
- await hooks[i].apply(this, args);
- }
-}
diff --git a/src/methods/insert.ts b/src/methods/insert.ts
index 28e0c484f..cdccfdb00 100644
--- a/src/methods/insert.ts
+++ b/src/methods/insert.ts
@@ -1,256 +1,116 @@
-import type { BooleanIndex, Lyra, PropertiesSchema, ResolveSchema } from "../types/index.js";
-import type { Language, TokenizerConfigExec } from "../tokenizer/index.js";
-import type { AVLNode } from "../../src/trees/avl/node.js";
-import type { RadixNode } from "../trees/radix/node.js";
-import { trackInsertion } from "../insertion-checker.js";
-import { insert as radixInsert } from "../trees/radix/index.js";
-import { insert as AVLInsert } from "../trees/avl/index.js";
-import { uniqueId } from "../utils.js";
-import { assertDocSchema } from "./common.js";
-import { hookRunner } from "./hooks.js";
-import * as ERRORS from "../errors.js";
+import { runMultipleHook, runSingleHook } from "../components/hooks.js";
+import { createError } from "../errors.js";
+import { trackInsertion } from "../components/sync-blocking-checker.js";
+import { Document, Schema, OpaqueIndex, OpaqueDocumentStore, Lyra } from "../types.js";
+
+export async function insert(
+ lyra: Lyra,
+ doc: Document,
+ language?: string,
+ skipHooks?: boolean,
+): Promise {
+ await lyra.validateSchema(doc, lyra.schema);
+ const { index, docs } = lyra.data;
-export type InsertConfig = {
- language?: Language;
- id?: (doc: ResolveSchema) => string | Promise;
-};
+ const id = await lyra.getDocumentIndexId(doc);
-export type InsertBatchConfig = InsertConfig & {
- batchSize?: number;
-};
+ if (typeof id !== "string") {
+ throw createError("DOCUMENT_ID_MUST_BE_STRING", typeof id);
+ }
-/**
- * Inserts a document into a database.
- * @param lyra The database to insert document into.
- * @param doc The document to insert.
- * @param config Optional parameter for overriding default configuration.
- * @returns An object containing id of the inserted document.
- * @example
- * const { id } = await insert(db, {
- * quote: 'You miss 100% of the shots you don\'t take',
- * author: 'Wayne Gretzky - Michael Scott'
- * });
- */
-export async function insert(
- lyra: Lyra,
- doc: ResolveSchema,
- config?: InsertConfig,
-): Promise<{ id: string }> {
- config = { language: lyra.defaultLanguage, ...config };
+ if (!(await lyra.documentsStore.store(docs, id, doc))) {
+ throw createError("DOCUMENT_ALREADY_EXISTS", id);
+ }
- const id = await getDocumentID(doc, config);
+ const docsCount = await lyra.documentsStore.count(docs);
- // If the ID already exists, we throw an error.
- if (lyra.docs[id]) throw new Error(ERRORS.ID_ALREADY_EXISTS(id));
+ if (!skipHooks) {
+ await runSingleHook(lyra.beforeInsert, lyra, id, doc);
+ }
- lyra.components?.tokenizer?.assertSupportedLanguage?.(config.language!);
+ const indexableProperties = await lyra.index.getSearchableProperties(index);
+ const indexablePropertiesWithTypes = await lyra.index.getSearchablePropertiesWithTypes(index);
+ const values = await lyra.getDocumentProperties(doc, indexableProperties);
- assertDocSchema(doc, lyra.schema);
+ for (const [key, value] of Object.entries(values)) {
+ if (typeof value === "undefined") {
+ continue;
+ }
- lyra.docs[id] = doc;
- lyra.docsCount++;
- recursiveradixInsertion(lyra, doc, id, config, undefined, lyra.components?.tokenizer as TokenizerConfigExec);
- trackInsertion(lyra);
+ const actualType = typeof value;
+ const expectedType = indexablePropertiesWithTypes[key];
- return { id };
-}
+ if (actualType !== expectedType) {
+ throw createError("INVALID_DOCUMENT_PROPERTY", key, expectedType, actualType);
+ }
+ }
-/**
- * Inserts a document into a database.
- * @param lyra The database to insert document into.
- * @param doc The document to insert.
- * @param config Optional parameter for overriding default configuration.
- * @returns A Promise object containing id of the inserted document.
- * @example
- * const { id } = await insert(db, {
- * quote: 'You miss 100% of the shots you don\'t take',
- * author: 'Wayne Gretzky - Michael Scott'
- * });
- */
-export async function insertWithHooks(
- lyra: Lyra,
- doc: ResolveSchema,
- config?: InsertConfig,
-): Promise<{ id: string }> {
- config = { language: lyra.defaultLanguage, ...config };
- const id = await getDocumentID(doc, config);
+ for (const prop of indexableProperties) {
+ const value = values[prop];
- lyra.components?.tokenizer?.assertSupportedLanguage?.(config.language!);
+ if (typeof value === "undefined") {
+ continue;
+ }
- assertDocSchema(doc, lyra.schema);
+ await lyra.index.beforeInsert?.(lyra.data.index, prop, id, value, language, lyra.tokenizer, docsCount);
+ await lyra.index.insert(lyra.data.index, prop, id, value, language, lyra.tokenizer, docsCount);
+ await lyra.index.afterInsert?.(lyra.data.index, prop, id, value, language, lyra.tokenizer, docsCount);
+ }
- lyra.docs[id] = doc;
- lyra.docsCount++;
- recursiveradixInsertion(lyra, doc, id, config, undefined, lyra.components?.tokenizer as TokenizerConfigExec);
- trackInsertion(lyra);
- if (lyra.hooks.afterInsert) {
- await hookRunner.call(lyra, lyra.hooks.afterInsert, id);
+ if (!skipHooks) {
+ await runSingleHook(lyra.afterInsert, lyra, id, doc);
}
- return { id };
+ trackInsertion(lyra);
+
+ return id;
}
-/**
- * Inserts a large array of documents into a database without blocking the event loop.
- * @param lyra The database to insert document into.
- * @param docs Array of documents to insert.
- * @param config Optional parameter for overriding default configuration.
- * @returns Promise.
- * @example
- * insertBatch(db, [
- * {
- * quote: 'You miss 100% of the shots you don\'t take',
- * author: 'Wayne Gretzky - Michael Scott'
- * },
- * {
- * quote: 'What I cannot createm I do not understand',
- * author: 'Richard Feynman'
- * }
- * ]);
- */
-export async function insertBatch(
- lyra: Lyra,
- docs: ResolveSchema[],
- config?: InsertBatchConfig,
-): Promise {
- const batchSize = config?.batchSize ?? 1000;
+export async function insertMultiple(
+ lyra: Lyra,
+ docs: Document[],
+ batchSize?: number,
+ language?: string,
+ skipHooks?: boolean,
+): Promise {
+ if (!batchSize) {
+ batchSize = 1000;
+ }
+
+ if (!skipHooks) {
+ await runMultipleHook(lyra.beforeMultipleInsert, lyra, docs);
+ }
- return new Promise((resolve, reject) => {
+ const ids: string[] = [];
+
+ await new Promise((resolve, reject) => {
let i = 0;
- async function _insertBatch() {
- const batch = docs.slice(i * batchSize, (i + 1) * batchSize);
+ async function _insertMultiple() {
+ const batch = docs.slice(i * batchSize!, (i + 1) * batchSize!);
i++;
if (!batch.length) {
return resolve();
}
- for (const line of batch) {
+ for (const doc of batch) {
try {
- await insertWithHooks(lyra, line, config);
+ const id = await insert(lyra, doc, language, skipHooks);
+ ids.push(id);
} catch (err) {
reject(err);
}
}
- setTimeout(_insertBatch, 0);
+ setTimeout(_insertMultiple, 0);
}
- setTimeout(_insertBatch, 0);
+ setTimeout(_insertMultiple, 0);
});
-}
-
-function recursiveradixInsertion(
- lyra: Lyra,
- doc: ResolveSchema,
- id: string,
- config: InsertConfig,
- prefix = "",
- tokenizerConfig: TokenizerConfigExec,
- schema: PropertiesSchema = lyra.schema,
-) {
- config = { language: lyra.defaultLanguage, ...config };
- const { index, frequencies, tokenOccurrencies } = lyra;
-
- for (const key of Object.keys(doc)) {
- const isNested = typeof doc[key] === "object";
- const isSchemaNested = typeof schema[key] == "object";
- const propName = `${prefix}${key}`;
- if (isNested && key in schema && isSchemaNested) {
- recursiveradixInsertion(
- lyra,
- doc[key] as ResolveSchema,
- id,
- config,
- propName + ".",
- tokenizerConfig,
- schema[key] as PropertiesSchema,
- );
- }
-
- if (typeof doc[key] === "number" && key in schema && !isSchemaNested) {
- AVLInsert(lyra.index[propName] as AVLNode, doc[key] as number, [id]);
- }
-
- if (typeof doc[key] === "boolean" && key in schema && !isSchemaNested) {
- const docKey = doc[key].toString() as "true" | "false";
- (lyra.index[propName] as BooleanIndex)[docKey].push(id);
- }
-
- if (typeof doc[key] === "string" && key in schema && !isSchemaNested) {
- // Use propName here because if doc is a nested object
- // We will get the wrong index
- const requestedTrie = index[propName];
- const tokens = tokenizerConfig.tokenizerFn(doc[key] as string, config.language!, false, tokenizerConfig);
- if (!(propName in frequencies)) {
- frequencies[propName] = {};
- }
-
- if (!(propName in tokenOccurrencies)) {
- tokenOccurrencies[propName] = {};
- }
-
- if (!(id in frequencies[propName])) {
- frequencies[propName][id] = {};
- }
-
- if (!(propName in lyra.fieldLengths)) {
- lyra.fieldLengths[propName] = {};
- }
-
- lyra.fieldLengths[propName][id] = tokens.length;
- lyra.avgFieldLength[propName] = ((lyra.avgFieldLength[propName] ?? 0) * (lyra.docsCount - 1) + tokens.length) / lyra.docsCount;
-
- for (const token of tokens) {
- let tokenFrequency = 0;
-
- for (const t of tokens) {
- if (t === token) {
- tokenFrequency++;
- }
- }
-
- const tf = tokenFrequency / tokens.length;
-
- frequencies[propName][id][token] = tf;
-
- if (!(token in tokenOccurrencies[propName])) {
- tokenOccurrencies[propName][token] = 0;
- }
-
- // increase a token counter that may not yet exist
- tokenOccurrencies[propName][token] = (tokenOccurrencies[propName][token] ?? 0) + 1;
-
- radixInsert(requestedTrie as RadixNode, token, id);
- }
- }
+ if (!skipHooks) {
+ await runMultipleHook(lyra.afterMultipleInsert, lyra, docs);
}
-}
-
-async function getDocumentID(
- doc: ResolveSchema,
- config: InsertConfig,
-): Promise {
- let id: string;
- // If the user passes a custom ID function, we use it to generate the ID.
- // This has the maximum priority.
- if (config?.id) {
- id = await config.id(doc);
-
- // If the user passes an ID in the document, we use it.
- } else if (doc.id && typeof doc.id === "string") {
- id = doc.id;
-
- // If the user passes an ID in the document, but it's not a string, we throw a type error.
- } else if (doc.id && typeof doc.id !== "string") {
- throw new TypeError(ERRORS.TYPE_ERROR_ID_MUST_BE_STRING(typeof doc.id));
-
- // If the user doesn't pass an ID, we generate one.
- } else {
- id = uniqueId();
- }
-
- return id;
+ return ids;
}
diff --git a/src/methods/load.ts b/src/methods/load.ts
deleted file mode 100644
index 166c1f6c6..000000000
--- a/src/methods/load.ts
+++ /dev/null
@@ -1,21 +0,0 @@
-import * as ERRORS from "../errors.js";
-import type { Data, Lyra, PropertiesSchema } from "../types/index.js";
-
-export async function load(
- lyra: Lyra,
- { index, docs, schema, frequencies, tokenOccurrencies, defaultLanguage, fieldLengths, avgFieldLength }: Data,
-): Promise {
- if (!lyra.edge) {
- throw new Error(ERRORS.GETTER_SETTER_WORKS_ON_EDGE_ONLY("load"));
- }
-
- lyra.index = index;
- lyra.docs = docs;
- lyra.docsCount = Object.keys(docs).length;
- lyra.schema = schema;
- lyra.frequencies = frequencies;
- lyra.tokenOccurrencies = tokenOccurrencies;
- lyra.defaultLanguage = defaultLanguage;
- lyra.fieldLengths = fieldLengths;
- lyra.avgFieldLength = avgFieldLength;
-}
diff --git a/src/methods/remove.ts b/src/methods/remove.ts
index 299405229..6a8fe6b11 100644
--- a/src/methods/remove.ts
+++ b/src/methods/remove.ts
@@ -1,107 +1,94 @@
-import type { RadixNode } from "../trees/radix/node.js";
-import type { Lyra, PropertiesSchema, ResolveSchema, BooleanIndex } from "../types/index.js";
-import { defaultTokenizerConfig } from "../tokenizer/index.js";
-import { removeDocumentByWord } from "../trees/radix/index.js";
-import { flattenObject, getNested } from "../utils.js";
-import { getNodeByKey as getAVLNodeByKey } from "../trees/avl/index.js";
-import * as ERRORS from "../errors.js";
-import { AVLNode } from "../trees/avl/node.js";
-
-/**
- * Removes a document from a database.
- * @param lyra The database to remove the document from.
- * @param docID The id of the document to remove.
- * @example
- * const isDeleted = await remove(db, 'L1tpqQxc0c2djrSN2a6TJ');
- */
-export async function remove(lyra: Lyra, docID: string): Promise {
- if (!lyra.components?.tokenizer) {
- lyra.components = {
- ...(lyra.components ?? {}),
- tokenizer: defaultTokenizerConfig(lyra.defaultLanguage),
- };
+import { runMultipleHook, runSingleHook } from "../components/hooks.js";
+import { trackRemoval } from "../components/sync-blocking-checker.js";
+import { createError } from "../errors.js";
+import { Lyra, OpaqueDocumentStore, OpaqueIndex, Schema } from "../types.js";
+
+export async function remove(
+ lyra: Lyra,
+ id: string,
+ language?: string,
+ skipHooks?: boolean,
+): Promise {
+ let result = true;
+ const { index, docs } = lyra.data;
+
+ const doc = await lyra.documentsStore.get(docs, id);
+ if (!doc) {
+ throw createError("DOCUMENT_DOES_NOT_EXIST", id);
}
- if (!(docID in lyra.docs)) {
- throw new Error(ERRORS.DOC_ID_DOES_NOT_EXISTS(docID));
+ const docsCount = await lyra.documentsStore.count(docs);
+
+ if (!skipHooks) {
+ await runSingleHook(lyra.beforeRemove, lyra, id);
}
- const document = lyra.docs[docID] || ({} as Record>);
- const documentKeys = Object.keys(document || {});
-
- const documentKeysLength = documentKeys.length;
- for (let i = 0; i < documentKeysLength; i++) {
- const key = documentKeys[i];
-
- const propertyType = lyra.schema[key];
-
- if (propertyType === "string") {
- const idx = lyra.index[key];
- const tokens: string[] = lyra.components.tokenizer!.tokenizerFn!(
- document[key] as string,
- lyra.defaultLanguage,
- false,
- lyra.components.tokenizer!,
- )!;
-
- lyra.avgFieldLength[key] = (lyra.avgFieldLength[key] * lyra.docsCount - lyra.fieldLengths[key][docID]) / (lyra.docsCount - 1);
- delete lyra.fieldLengths[key][docID];
-
- const tokensLength = tokens.length;
- for (let k = 0; k < tokensLength; k++) {
- const token = tokens[k];
- delete lyra.frequencies[key][docID];
- lyra.tokenOccurrencies[key][token]--;
- if (token && !removeDocumentByWord(idx as RadixNode, token, docID)) {
- throw new Error(ERRORS.CANT_DELETE_DOCUMENT(docID, key, token));
- }
- }
+ const indexableProperties = await lyra.index.getSearchableProperties(index);
+ const values = await lyra.getDocumentProperties(doc, indexableProperties);
+
+ for (const prop of indexableProperties) {
+ const value = values[prop];
+ await lyra.index.beforeRemove?.(lyra.data.index, prop, id, value, language, lyra.tokenizer, docsCount);
+ if (!(await lyra.index.remove(lyra.data.index, prop, id, value, language, lyra.tokenizer, docsCount))) {
+ result = false;
}
+ await lyra.index.afterRemove?.(lyra.data.index, prop, id, value, language, lyra.tokenizer, docsCount);
}
- removeNumericValue(lyra, docID);
- removeBooleanValue(lyra, docID);
-
- lyra.docs[docID] = undefined;
- lyra.docsCount--;
+ if (!skipHooks) {
+ await runSingleHook(lyra.afterRemove, lyra, id);
+ }
- return true;
+ trackRemoval(lyra);
+ return result;
}
-function removeNumericValue(lyra: Lyra, docID: string) {
- const document = lyra.docs[docID] as Record>;
- const flatDocument = flattenObject(document);
- const documentNumericOnly = Object.keys(flatDocument).reduce((acc, key) => {
- if (getNested(lyra.schema, key) === "number") {
- acc[key] = (flatDocument as any)[key];
- }
- return acc;
- }, {} as Record);
-
- for (const [property, value] of Object.entries(documentNumericOnly)) {
- const idx = lyra.index[property] as AVLNode;
- const node = getAVLNodeByKey(idx, value);
+export async function removeMultiple(
+ lyra: Lyra,
+ ids: string[],
+ batchSize?: number,
+ language?: string,
+ skipHooks?: boolean,
+): Promise {
+ let result = true;
+
+ if (!batchSize) {
+ batchSize = 1000;
+ }
- if (node) {
- node.value = node.value.filter((id) => id !== docID);
- }
+ if (!skipHooks) {
+ await runMultipleHook(lyra.beforeMultipleRemove, lyra, ids);
}
-}
-
-function removeBooleanValue(lyra: Lyra, docID: string) {
- const document = lyra.docs[docID] as Record>;
- const flatDocument = flattenObject(document);
- const documentBooleanOnly = Object.keys(flatDocument).reduce((acc, key) => {
- if (getNested(lyra.schema, key) === "boolean") {
- acc[key] = (flatDocument as any)[key];
+
+ await new Promise((resolve, reject) => {
+ let i = 0;
+ async function _insertMultiple() {
+ const batch = ids.slice(i * batchSize!, (i + 1) * batchSize!);
+ i++;
+
+ if (!batch.length) {
+ return resolve();
+ }
+
+ for (const doc of batch) {
+ try {
+ if (!(await remove(lyra, doc, language, skipHooks))) {
+ result = false;
+ }
+ } catch (err) {
+ reject(err);
+ }
+ }
+
+ setTimeout(_insertMultiple, 0);
}
- return acc;
- }, {} as Record);
- for (const [property] of Object.entries(documentBooleanOnly)) {
- const idx = lyra.index[property] as BooleanIndex;
+ setTimeout(_insertMultiple, 0);
+ });
- idx.true.slice(idx.true.indexOf(docID), 1);
- idx.false.slice(idx.false.indexOf(docID), 1);
+ if (!skipHooks) {
+ await runMultipleHook(lyra.afterMultipleRemove, lyra, ids);
}
-}
\ No newline at end of file
+
+ return result;
+}
diff --git a/src/methods/save.ts b/src/methods/save.ts
deleted file mode 100644
index ceb5901e3..000000000
--- a/src/methods/save.ts
+++ /dev/null
@@ -1,14 +0,0 @@
-import type { Data, Lyra, PropertiesSchema } from "../types/index.js";
-
-export async function save(lyra: Lyra): Promise> {
- return {
- index: lyra.index,
- docs: lyra.docs,
- schema: lyra.schema,
- frequencies: lyra.frequencies,
- tokenOccurrencies: lyra.tokenOccurrencies,
- defaultLanguage: lyra.defaultLanguage,
- avgFieldLength: lyra.avgFieldLength,
- fieldLengths: lyra.fieldLengths,
- };
-}
diff --git a/src/methods/search.ts b/src/methods/search.ts
index d6e0b4195..d9da68242 100644
--- a/src/methods/search.ts
+++ b/src/methods/search.ts
@@ -1,203 +1,41 @@
-import type { RadixNode } from "../trees/radix/node.js";
-import type { Lyra, PropertiesSchema, ResolveSchema, SearchProperties, TokenMap, TokenScore, BM25Params, BM25OptionalParams, PropertiesBoost, FacetsSearch } from "../types/index.js";
-import type { WhereFilter } from "../types/filters.js";
-import { defaultTokenizerConfig, Language } from "../tokenizer/index.js";
-import { find as radixFind } from "../trees/radix/index.js";
-import { formatNanoseconds, getNanosecondsTime, sortTokenScorePredicate } from "../utils.js";
-import { getIndices } from "./common.js";
-import { prioritizeTokenScores, BM25 } from "../algorithms.js";
-import { FacetReturningValue, getFacets } from "../facets.js";
-import { getWhereFiltersIDs, intersectFilteredIDs } from "../filters.js";
-
-type IndexMap = Record;
-
-export type RetrievedDoc = {
- /**
- * The id of the document.
- */
- id: string;
- /**
- * The score of the document in the search.
- */
- score: number;
- /**
- * The document
- */
- document: ResolveSchema;
-};
-
-export type SearchParams = {
- /**
- * The word to search.
- */
- term: string;
- /**
- * The properties of the document to search in.
- */
- properties?: "*" | SearchProperties[];
- /**
- * The number of matched documents to return.
- */
- limit?: number;
- /**
- * The number of matched documents to skip.
- */
- offset?: number;
- /**
- * Whether to match the term exactly.
- */
- exact?: boolean;
- /**
- * The maximum [levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance)
- * between the term and the searchable property.
- */
- tolerance?: number;
- /**
- * The BM25 parameters to use.
- *
- * k: Term frequency saturation parameter.
- * The higher the value, the more important the term frequency becomes.
- * The default value is 1.2. It should be set to a value between 1.2 and 2.0.
- *
- * b: Document length saturation impact. The higher the value, the more
- * important the document length becomes. The default value is 0.75.
- *
- * d: Frequency normalization lower bound. Default value is 0.5.
- *
- * @see https://en.wikipedia.org/wiki/Okapi_BM25
- */
- relevance?: BM25OptionalParams;
- /**
- * The boost to apply to the properties.
- *
- * The boost is a number that is multiplied to the score of the property.
- * It can be used to give more importance to some properties.
- *
- * @example
- * // Give more importance to the 'title' property.
- * const result = await search(db, {
- * term: 'Michael',
- * properties: ['title', 'author'],
- * boost: {
- * title: 2
- * }
- * });
- *
- * // In that case, the score of the 'title' property will be multiplied by 2.
- */
- boost?: PropertiesBoost;
- /**
- * Facets configuration
- *
- * A facet is a feature that allows users to narrow down their search results by specific
- * attributes or characteristics, such as category, price, or location.
- * This can help users find more relevant and specific results for their search query.
- *
- * @example
- *
- * const results = await search(db, {
- * term: 'Personal Computer',
- * properties: ['title', 'description', 'category.primary', 'category.secondary'],
- * facets: {
- * 'category.primary': {
- * size: 10,
- * sort: 'ASC',
- * }
- * }
- * });
- */
- facets?: FacetsSearch;
-
- /**
- * Filter the search results.
- *
- * @example
- * // Search for documents that contain 'Headphones' in the 'description' and 'title' fields and
- * // have a price less than 100.
- *
- * const result = await search(db, {
- * term: 'Headphones',
- * properties: ['description', 'title'],
- * where: {
- * price: {
- * lt: 100
- * }
- * }
- * });
- */
- where?: WhereFilter;
-};
+import { prioritizeTokenScores } from "../components/algorithms.js";
+import { getFacets } from "../components/facets.js";
+import { intersectFilteredIDs } from "../components/filters.js";
+import { createError } from "../errors.js";
+import {
+ BM25Params,
+ IndexMap,
+ Lyra,
+ OpaqueDocumentStore,
+ OpaqueIndex,
+ Result,
+ Results,
+ Schema,
+ SearchContext,
+ SearchParams,
+ TokenMap,
+} from "../types.js";
+import { getNanosecondsTime, sortTokenScorePredicate } from "../utils.js";
-export type SearchResult = {
- /**
- * The number of all the matched documents.
- */
- count: number;
- /**
- * An array of matched documents taking `limit` and `offset` into account.
- */
- hits: RetrievedDoc[];
- /**
- * The time taken to search.
- */
- elapsed: bigint | string;
- /**
- * The facets results.
- */
- facets?: FacetReturningValue;
+const defaultBM25Params: BM25Params = {
+ k: 1.2,
+ b: 0.75,
+ d: 0.5,
};
-/**
- * Searches for documents in a database.
- * @param lyra The database to search.
- * @param params The search query.
- * @param language Optional parameter to override the default language analyzer.
- * @example
- * // Search for documents that contain 'Michael' in the 'author' field.
- * const result = await search(db, {
- * term: 'Michael',
- * properties: ['author']
- * });
- */
-export async function search(
- lyra: Lyra,
- params: SearchParams,
- language?: Language,
-): Promise> {
- if (!language) {
- language = lyra.defaultLanguage;
- }
-
- if (!lyra.components?.tokenizer) {
- lyra.components = {
- ...(lyra.components ?? {}),
- tokenizer: defaultTokenizerConfig(language),
- };
- }
-
- params.relevance = getBM25Parameters(params.relevance);
-
- const shouldCalculateFacets = params.facets && Object.keys(params.facets).length > 0;
- const { limit = 10, offset = 0, exact = false, term, properties } = params;
- const tokens = lyra.components.tokenizer!.tokenizerFn!(term, language, false, lyra.components.tokenizer!);
- const indices = getIndices(lyra, properties);
- const results: RetrievedDoc[] = Array.from({
- length: limit,
- });
- const N = lyra.docsCount;
-
- const timeStart = getNanosecondsTime();
-
+function createSearchContext(
+ properties: string[],
+ tokens: string[],
+ params: SearchParams,
+ docsCount: number,
+): SearchContext {
// If filters are enabled, we need to get the IDs of the documents that match the filters.
- const hasFilters = Object.keys(params.where ?? {}).length > 0;
- let whereFiltersIDs: string[] = [];
+ // const hasFilters = Object.keys(params.where ?? {}).length > 0;
+ // let whereFiltersIDs: string[] = [];
- if (hasFilters) {
- whereFiltersIDs = getWhereFiltersIDs(params.where!, lyra);
- }
-
- // uniqueDocsIDs contains unique document IDs for all the tokens in all the indices.
- const uniqueDocsIDs: Record = {};
+ // if (hasFilters) {
+ // whereFiltersIDs = getWhereFiltersIDs(params.where!, lyra);
+ // }
// indexMap is an object containing all the indexes considered for the current search,
// and an array of doc IDs for each token in all the indices.
@@ -213,6 +51,7 @@ export async function search(
// }
// }
const indexMap: IndexMap = {};
+
// After we create the indexMap, we need to calculate the intersection
// between all the postings lists for each token.
// Given the example above, docsIntersection will look like this:
@@ -224,85 +63,109 @@ export async function search(
// as doc2 is the only document present in all the postings lists for the "description" index.
const docsIntersection: TokenMap = {};
- for (const index of indices) {
+ for (const prop of properties) {
const tokensMap: TokenMap = {};
for (const token of tokens) {
tokensMap[token] = [];
}
- indexMap[index] = tokensMap;
- docsIntersection[index] = [];
+ indexMap[prop] = tokensMap;
+ docsIntersection[prop] = [];
}
- // Now it's time to loop over all the indices and get the documents IDs for every single term
- const indexesLength = indices.length;
- for (let i = 0; i < indexesLength; i++) {
- const index = indices[i];
- const avgFieldLength = lyra.avgFieldLength[index];
- const fieldLengths = lyra.fieldLengths[index];
+ return {
+ timeStart: getNanosecondsTime(),
+ params,
+ docsCount,
+ uniqueDocsIDs: {},
+ indexMap,
+ docsIntersection,
+ };
+}
- if (!(index in lyra.tokenOccurrencies)) continue;
+export async function search(
+ lyra: Lyra,
+ params: SearchParams,
+ language?: string,
+): Promise {
+ params.relevance = Object.assign(params.relevance ?? {}, defaultBM25Params);
- const lyraOccurrencies = lyra.tokenOccurrencies[index];
- const lyraFrequencies = lyra.frequencies[index];
+ const shouldCalculateFacets = params.facets && Object.keys(params.facets).length > 0;
+ const { limit = 10, offset = 0, term, properties } = params;
+
+ const { index, docs } = lyra.data;
+ const tokens = lyra.tokenizer.tokenize(term, language);
+
+ // Get searchable string properties
+ let propertiesToSearch = lyra.caches["propertiesToSearch"] as string[];
+ if (!propertiesToSearch) {
+ const propertiesToSearchWithTypes = await lyra.index.getSearchablePropertiesWithTypes(index);
+
+ propertiesToSearch = await lyra.index.getSearchableProperties(index);
+ propertiesToSearch = propertiesToSearch.filter((prop: string) => propertiesToSearchWithTypes[prop] === "string");
+
+ lyra.caches["propertiesToSearch"] = propertiesToSearch;
+ }
+
+ if (properties && properties !== "*") {
+ for (const prop of properties) {
+ if (!propertiesToSearch.includes(prop)) {
+ throw createError("UNKNOWN_INDEX", prop, propertiesToSearch.join(", "));
+ }
+ }
+
+ propertiesToSearch = propertiesToSearch.filter((prop: string) => properties.includes(prop));
+ }
+
+ // Create the search context and the results
+ const context = createSearchContext(propertiesToSearch, tokens, params, await lyra.documentsStore.count(docs));
+ const results: Result[] = Array.from({
+ length: limit,
+ });
+
+ // If filters are enabled, we need to get the IDs of the documents that match the filters.
+ const hasFilters = Object.keys(params.where ?? {}).length > 0;
+ let whereFiltersIDs: string[] = [];
+
+ if (hasFilters) {
+ whereFiltersIDs = lyra.index.searchByWhereClause(index, params.where!);
+ }
+
+ // Now it's time to loop over all the indices and get the documents IDs for every single term
+ const indexesLength = propertiesToSearch.length;
+ for (let i = 0; i < indexesLength; i++) {
+ const prop = propertiesToSearch[i];
const tokensLength = tokens.length;
for (let j = 0; j < tokensLength; j++) {
const term = tokens[j];
- // Here we get a TypeScript error: Type instantiation is excessively deep and possibly infinite.
- // Type definition is correct, but TypeScript is not able to infer the type recursively.
- // eslint-disable-next-line @typescript-eslint/ban-ts-comment
- // @ts-ignore
- const documentIDs = getDocumentIDsFromSearch(lyra, { ...params, index, term, exact });
-
- // lyraOccurrencies[term] can be undefined, 0, string, or { [k: string]: number }
- const termOccurrencies = typeof lyraOccurrencies[term] === "number" ? lyraOccurrencies[term] ?? 0 : 0;
-
- const scoreList: TokenScore[] = [];
-
- // Calculate TF-IDF value for each term, in each document, for each index.
- // Then insert sorted results into orderedTFIDFList.
- const documentIDsLength = documentIDs.length;
- for (let k = 0; k < documentIDsLength; k++) {
- const id = documentIDs[k];
- const tf = lyraFrequencies?.[id]?.[term] ?? 0;
-
- const bm25 = BM25(
- tf,
- termOccurrencies,
- N,
- fieldLengths[id],
- avgFieldLength,
- params.relevance as BM25Params,
- );
-
- scoreList.push([id, bm25]);
- }
+ // Lookup
+ const scoreList = await lyra.index.search(index, prop, term, context);
- indexMap[index][term].push(...scoreList);
+ context.indexMap[prop][term].push(...scoreList);
}
- const docIds = indexMap[index];
+ const docIds = context.indexMap[prop];
const vals = Object.values(docIds);
- docsIntersection[index] = prioritizeTokenScores(vals, params?.boost?.[index] ?? 1);
- const uniqueDocs = docsIntersection[index];
+ context.docsIntersection[prop] = prioritizeTokenScores(vals, params?.boost?.[prop] ?? 1);
+ const uniqueDocs = context.docsIntersection[prop];
const uniqueDocsLength = uniqueDocs.length;
for (let i = 0; i < uniqueDocsLength; i++) {
- const [id, tfIdfScore] = uniqueDocs[i];
+ const [id, score] = uniqueDocs[i];
- const prevScore = uniqueDocsIDs[id];
+ const prevScore = context.uniqueDocsIDs[id];
if (prevScore) {
- uniqueDocsIDs[id] = prevScore + tfIdfScore + 0.5;
+ context.uniqueDocsIDs[id] = prevScore + score + 0.5;
} else {
- uniqueDocsIDs[id] = tfIdfScore;
+ context.uniqueDocsIDs[id] = score;
}
}
}
// Get unique doc IDs from uniqueDocsIDs map, sorted by value.
- let uniqueDocsArray = Object.entries(uniqueDocsIDs).sort(sortTokenScorePredicate);
-
+ let uniqueDocsArray = Object.entries(context.uniqueDocsIDs).sort(sortTokenScorePredicate);
+
// If filters are enabled, we need to remove the IDs of the documents that don't match the filters.
if (hasFilters) {
uniqueDocsArray = intersectFilteredIDs(whereFiltersIDs, uniqueDocsArray);
@@ -310,7 +173,7 @@ export async function search(
const resultIDs: Set = new Set();
// Populate facets if needed
- const facets = shouldCalculateFacets ? getFacets(lyra.schema, lyra.docs, uniqueDocsArray, params.facets!) : {};
+ const facets = shouldCalculateFacets ? await getFacets(lyra, uniqueDocsArray, params.facets!) : {};
// We already have the list of ALL the document IDs containing the search terms.
// We loop over them starting from a positional value "offset" and ending at "offset + limit"
@@ -328,20 +191,14 @@ export async function search(
if (!resultIDs.has(id)) {
// We retrieve the full document only AFTER making sure that we really want it.
// We never retrieve the full document preventively.
- const fullDoc = lyra.docs[id]!;
- results[i] = { id, score, document: fullDoc };
+ const fullDoc = await lyra.documentsStore.get(docs, id);
+ results[i] = { id, score, document: fullDoc! };
resultIDs.add(id);
}
}
- let elapsed: bigint | string = getNanosecondsTime() - timeStart;
-
- if (lyra.components.elapsed?.format === "human") {
- elapsed = formatNanoseconds(elapsed);
- }
-
- const searchResult: SearchResult = {
- elapsed,
+ const searchResult: Results = {
+ elapsed: await lyra.formatElapsedTime(getNanosecondsTime() - context.timeStart),
hits: results.filter(Boolean),
count: uniqueDocsArray.length,
};
@@ -352,35 +209,3 @@ export async function search(
return searchResult;
}
-
-function getDocumentIDsFromSearch(
- lyra: Lyra,
- params: SearchParams & { index: string },
-): string[] {
- const idx = lyra.index[params.index];
- const searchResult = radixFind(idx as RadixNode, {
- term: params.term,
- exact: params.exact,
- tolerance: params.tolerance,
- });
-
- const ids = new Set