Skip to content

Commit

Permalink
reorg
Browse files Browse the repository at this point in the history
  • Loading branch information
tomfran committed Jan 20, 2024
1 parent 6b1b43e commit 603ed2e
Show file tree
Hide file tree
Showing 25 changed files with 73 additions and 42 deletions.
6 changes: 1 addition & 5 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,7 @@ Cargo.lock
/target

# Data files
/data/wiki-data
/data/index_unit_test/index
/data/test
/data/small
/data/illinois
/data

# TODO
todo.md
21 changes: 5 additions & 16 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,17 +1,6 @@
[package]
name = "search-rs"
version = "0.1.0"
edition = "2021"
[workspace]

[lib]
name = "search"
path = "src/lib.rs"

[dependencies]
rand = "0.8"
tokenizers = { version = "0.15.0", features = ["http"] }
rust-stemmers = "1.2.0"
rayon = "1.8.0"
indicatif = {version = "0.17.0", features = ["rayon", "improved_unicode"]}
fxhash = "0.2.1"
tempdir = "0.3.7"
members = [
"search",
"client"
]
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Search engine written in Rust, based on an inverted index on disk.

**Index construction**
- [x] In-memory datasets index construction;
- [ ] Proper vocabulary and paths on disk;
- [x] Proper vocabulary and paths on disk;
- [ ] Spelling correction index;
- [ ] Disk-based partial index construction and merging;

Expand All @@ -30,6 +30,10 @@ Search engine written in Rust, based on an inverted index on disk.
- [ ] Query quality;
- [ ] Disk overhead.

**Client**
- [x] CLI
- [ ] Web interface

## Crates in use
- [stemmer-rs](https://github.com/lise-henry/stemmer-rs)
- [tokenizers](https://github.com/huggingface/tokenizers)
Expand Down
10 changes: 10 additions & 0 deletions client/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[package]
name = "client"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
actix-web = "4.4.1"
search = { path = "../search" }
28 changes: 28 additions & 0 deletions client/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
use actix_web::{get, post, web, App, HttpResponse, HttpServer, Responder};

#[get("/")]
async fn hello() -> impl Responder {
HttpResponse::Ok().body("Hello world!")
}

#[post("/echo")]
async fn echo(req_body: String) -> impl Responder {
HttpResponse::Ok().body(req_body)
}

async fn manual_hello() -> impl Responder {
HttpResponse::Ok().body("Hey there!")
}

#[actix_web::main]
async fn main() -> std::io::Result<()> {
HttpServer::new(|| {
App::new()
.service(hello)
.service(echo)
.route("/hey", web::get().to(manual_hello))
})
.bind(("127.0.0.1", 8080))?
.run()
.await
}
14 changes: 14 additions & 0 deletions search/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[package]
name = "search"
version = "0.1.0"
edition = "2021"


[dependencies]
rand = "0.8"
tokenizers = { version = "0.15.0", features = ["http"] }
rust-stemmers = "1.2.0"
rayon = "1.8.0"
indicatif = {version = "0.17.0", features = ["rayon", "improved_unicode"]}
fxhash = "0.2.1"
tempdir = "0.3.7"
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
16 changes: 3 additions & 13 deletions src/index/mod.rs → search/src/index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,9 @@ mod test {
fn test_build() {
let index_path = &create_temporary_dir_path();

Index::build_index(
"data/index_unit_test/docs",
index_path,
"data/index_unit_test/test_tokenizer",
);
Index::build_index("test_data/docs", index_path, "test_data/test_tokenizer");

let mut idx = Index::load_index(index_path, "data/index_unit_test/test_tokenizer");
let mut idx = Index::load_index(index_path, "test_data/test_tokenizer");

for ele in ["hello", "man", "world"] {
assert!(idx.term_to_index.contains_key(ele));
Expand All @@ -119,13 +115,7 @@ mod test {

hello_docs.sort();

assert_eq!(
hello_docs,
[
"data/index_unit_test/docs/1.txt",
"data/index_unit_test/docs/2.txt"
]
);
assert_eq!(hello_docs, ["test_data/docs/1.txt", "test_data/docs/2.txt"]);

assert_eq!(pl.collection_frequency, 2);

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
14 changes: 7 additions & 7 deletions src/main.rs → search/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::env;
use std::io::{self, Write};
use std::process::Command;
use std::process::{exit, Command};
use std::time::{Duration, Instant};

use search::index::Index;
Expand Down Expand Up @@ -53,10 +53,7 @@ fn main() {
let args: Vec<String> = env::args().collect();

if args.len() < 3 || args.len() > 5 {
println!(
"Usage: {} <base_path> <load_or_build> [build_num_threads]",
args[0]
);
println!("Usage: cargo run --bin search <base_path> <load_or_build> [build_num_threads]");
return;
}

Expand Down Expand Up @@ -87,9 +84,12 @@ fn main() {
Index::build_index(&docs_path, &index_path, &tokenizer_path);
let elapsed_time = start_time.elapsed();
println!(
"Index built in {}.\n",
HumanDuration(Duration::from_secs(elapsed_time.as_secs()))
"Index built in {}.\n\nLoad options:\n- CLI: cargo run --release --bin search {} load",
HumanDuration(Duration::from_secs(elapsed_time.as_secs())),
base_path
);

exit(0);
}

let mut q = QueryProcessor::build_query_processor(&index_path, &tokenizer_path);
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit 603ed2e

Please sign in to comment.