Skip to content

Commit

Permalink
feat: adds search hits, elapsed time, and count
Browse files Browse the repository at this point in the history
  • Loading branch information
micheleriva committed May 17, 2022
1 parent 6fac8f2 commit b2a23c0
Show file tree
Hide file tree
Showing 12 changed files with 28,449 additions and 26 deletions.
4 changes: 3 additions & 1 deletion .eslintignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ jest.config.js
.eslintrc.cjs
*.yaml
*.md
*.snap
*.snap
*.csv
*.sh
7 changes: 7 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,12 @@
"ts-jest": "^28.0.2",
"ts-node": "^10.7.0",
"typescript": "^4.6.4"
},
"pnpm": {
"peerDependencyRules": {
"ignoreMissing": [
"typescript"
]
}
}
}
1 change: 1 addition & 0 deletions packages/lyra/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"bugs": {
"url": "https://github.com/nearform/lyra"
},
"main": "./src/lyra.ts",
"dependencies": {
"fastq": "^1.13.0",
"nanoid": "3.3.4"
Expand Down
29 changes: 20 additions & 9 deletions packages/lyra/src/lyra.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import * as fastq from "fastq";
import { Trie } from "./prefix-tree/trie";
import * as ERRORS from "./errors";
import { tokenize } from "./tokenizer";
import { formatNanoseconds } from "./utils";

export type PropertyType = "string" | "number" | "boolean";

Expand Down Expand Up @@ -31,6 +32,12 @@ type QueueDocParams = {
doc: object;
};

type SearchResult = Promise<{
count: number;
hits: object[];
elapsed: string;
}>;

export class Lyra {
private schema: PropertiesSchema;
private docs: LyraDocs = new Map();
Expand Down Expand Up @@ -60,10 +67,12 @@ export class Lyra {
}
}

async search(params: SearchParams) {
async search(params: SearchParams): SearchResult {
const tokens = tokenize(params.term).values();
const indices = this.getIndices(params.properties);
const results = [];
const results: object[] = [];

const timeStart = process.hrtime.bigint();

for (const token of tokens) {
for (const index of indices) {
Expand All @@ -81,7 +90,11 @@ export class Lyra {
}
}

return results;
return {
elapsed: formatNanoseconds(process.hrtime.bigint() - timeStart),
hits: results.slice(params.offset ?? 0, params.limit ?? 10), // @todo avoid getting all results and slicing them
count: results.length,
};
}

private getIndices(indices: SearchParams["properties"]): string[] {
Expand All @@ -108,15 +121,14 @@ export class Lyra {
return indices as string[];
}

private async _search(params: SearchParams & { index: string }) {
private async _search(
params: SearchParams & { index: string }
): Promise<object[]> {
const idx = this.index.get(params.index);
const searchResult = idx?.find(params.term);
const results = [];
let count = 0;
const results: object[] = [];

for (const key in searchResult) {
if (params.limit && count > params.limit) break;

const docs: string[] = [];

for (const id of (searchResult as any)[key]) {
Expand All @@ -126,7 +138,6 @@ export class Lyra {
docs.push({ id, ...fullDoc });
}

count++;
results.push(docs);
}

Expand Down
22 changes: 22 additions & 0 deletions packages/repl/commands.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import yargs from "yargs";

export function commands(input: string) {
return yargs(input as unknown as string[])
.command("search <text...>", "searches a text")
.option("limit", {
alias: "l",
type: "number",
default: 10,
})
.option("offset", {
alias: "o",
type: "number",
default: 0,
})
.option("properties", {
alias: "p",
type: "string",
default: "*",
})
.parse();
}
28,228 changes: 28,228 additions & 0 deletions packages/repl/datasets/reviews.csv

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions packages/repl/datasets/reviews.json

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions packages/repl/get-datasets.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
wget https://datasets.imdbws.com/title.episode.tsv.gz -O ./datasets/title.basics.tsv.gz
gzip -d ./datasets/title.basics.tsv.gz
head -n1000000 ./datasets/title.basics.tsv >> ./datasets/title.basics.short.tsv
rm ./datasets/title.basics.tsv
24 changes: 24 additions & 0 deletions packages/repl/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"name": "@nearform/lyra-repl",
"description": "Repl for the Lyra search engine",
"version": "0.0.1",
"author": {
"name": "Michele Riva",
"email": "ciao@micheleriva.it",
"url": "https://github.com/MicheleRiva"
},
"bugs": {
"url": "https://github.com/nearform/lyra"
},
"dependencies": {
"@nearform/lyra": "workspace:^",
"csv": "^6.0.5",
"yargs": "^17.5.1"
},
"devDependencies": {
"@types/jest": "^27.5.0",
"@types/yargs": "^17.0.10",
"jest": "^28.1.0",
"ts-jest": "^28.0.2"
}
}
68 changes: 68 additions & 0 deletions packages/repl/repl.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import readline from "readline";
import { Lyra } from "@nearform/lyra";
import dataset from "./datasets/reviews.json";
import { commands } from "./commands";

type Dataset = {
"": string;
Rating: string;
"Review Text": string;
"Division Name": string;
Title: string;
"Recommended IND": string;
Age: string;
"Department Name": string;
"Class Name": string;
"Positive Feedback Count": string;
"Clothing ID": string;
};

const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
terminal: false,
});

const db = new Lyra({
schema: {
rating: "string",
review: "string",
title: "string",
},
});

async function load() {
for (const data of dataset as Dataset[]) {
await db.insert({
rating: data.Rating,
review: data["Review Text"],
title: data.Title,
});
}
}

async function parseLine(input: string) {
const cmd = await commands(input);
const tokens = (cmd as any).text.join(", ");

const properties = cmd.properties === "*" ? "*" : cmd.properties.split(",");

const result = await db.search({
term: tokens,
limit: cmd.limit,
offset: cmd.offset,
properties,
});

console.log(result);
}

async function start() {
console.log("loading dataset...");
await load();
console.log(`${(dataset as any[]).length} reviews loaded`);

rl.on("line", parseLine);
}

start();
Loading

0 comments on commit b2a23c0

Please sign in to comment.