Skip to content

Commit

Permalink
client
Browse files Browse the repository at this point in the history
  • Loading branch information
tomfran committed Jan 20, 2024
1 parent c27320f commit 02a1cd7
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 60 deletions.
12 changes: 5 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,21 @@ Search engine written in Rust, based on an inverted index on disk.
**Index construction**
- [x] In-memory datasets index construction;
- [x] Proper vocabulary and paths on disk;
- [ ] Spelling correction index;
- [ ] Disk-based partial index construction and merging;
- [ ] Spelling correction index.

**Queries**
- [x] Tf-idf ranked retrieval;
- [x] Window computation.
- [ ] Boolean queries;
- [ ] Parallel scoring.
- [x] Window computation;
- [ ] FIle content retrieval.

**Evaluation**
- [ ] Query speed;
- [ ] Query quality;
- [ ] Disk overhead.

**Client**
- [x] CLI
- [ ] Web interface
- [x] CLI;
- [ ] Web interface.

## Crates in use
- [stemmer-rs](https://github.com/lise-henry/stemmer-rs)
Expand Down
6 changes: 5 additions & 1 deletion client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
actix-web = "4.4.1"
askama = "0.12.1"
axum = "0.7.4"
env_logger = "0.11.0"
log = "0.4.20"
search = { path = "../search" }
serde = "1.0.195"
tokio = { version = "1.35.1", features = ["macros", "rt-multi-thread"] }
147 changes: 95 additions & 52 deletions client/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,77 +1,120 @@
use actix_web::{post, web, App, HttpServer, Responder, Result};
use askama::Template;
use axum::{
extract::{Json, State},
http::StatusCode,
response::{Html, IntoResponse, Response},
routing::{get, post},
Router,
};
use log::info;
use search::query::QueryProcessor;
use serde::{Deserialize, Serialize};
use std::{env, sync::Mutex, time::Instant};
use std::{
env,
sync::{Arc, Mutex},
time::Instant,
};

struct AppState {
query_processor: Mutex<QueryProcessor>,
}

#[tokio::main]
async fn main() {
// logger
std::env::set_var("RUST_LOG", "info");
env_logger::init();

let args: Vec<String> = env::args().collect();

if args.len() < 2 {
println!("Usage: cargo run --bin client <base_path>");
return;
}

let base_path = &args[1];
let index_path = format!("{}/index/index", base_path);
let tokenizer_path = format!("{}/tokenizer/bert-base-uncased", base_path);

let state = Arc::new(AppState {
query_processor: Mutex::new(QueryProcessor::build_query_processor(
&index_path,
&tokenizer_path,
)),
});

let app = Router::new()
.route("/", get(root))
.route("/query", post(post_query))
.with_state(state);

let listener = tokio::net::TcpListener::bind("0.0.0.0:3000").await.unwrap();

info!("Application started");
axum::serve(listener, app).await.unwrap();
}

async fn root() {}

#[derive(Deserialize, Debug)]
struct QueryRequest {
query: String,
limit: usize,
}

#[derive(Serialize)]
#[derive(Template)]
#[template(path = "query.html")]
struct QueryResponse {
num_results: u32,
time_ms: u128,
documents: Vec<QueryDocumentResponse>,
documents: Vec<Document>,
}

#[derive(Serialize)]
struct QueryDocumentResponse {
#[derive(Serialize, Deserialize)]
struct Document {
id: u32,
score: f32,
path: String,
}

#[post("/query")]
async fn query(
r: web::Json<QueryRequest>,
q: web::Data<Mutex<QueryProcessor>>,
) -> Result<impl Responder> {
println!("query: {:?}", r);
struct HtmlTemplate<T>(T);

let mut local_q = q.lock().unwrap();
impl<T> IntoResponse for HtmlTemplate<T>
where
T: Template,
{
fn into_response(self) -> Response {
match self.0.render() {
Ok(html) => Html(html).into_response(),

let start_time = Instant::now();
let result = local_q.query(&r.query, r.limit);
let elapsed_time = start_time.elapsed();

let response = QueryResponse {
num_results: result.len() as u32,
time_ms: elapsed_time.as_millis(),
documents: result
.iter()
.map(|e| QueryDocumentResponse {
id: e.id,
score: e.score,
path: e.path.clone(),
})
.collect(),
};

Ok(web::Json(response))
Err(err) => (
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to render template. Error: {}", err),
)
.into_response(),
}
}
}

#[actix_web::main]
async fn main() -> std::io::Result<()> {
let args: Vec<String> = env::args().collect();
if args.len() < 2 {
println!("Usage: cargo run --bin client <base_path>");
return Ok(());
}
async fn post_query(
State(state): State<Arc<AppState>>,
Json(payload): Json<QueryRequest>,
) -> impl IntoResponse {
info!("Query request: {:?}", payload);

let base_path = &args[1];
let index_path = format!("{}/index/index", base_path);
let tokenizer_path = format!("{}/tokenizer/bert-base-uncased", base_path);
let mut q = state.query_processor.lock().unwrap();

let start_time = Instant::now();
let query_result = q.query(&payload.query, payload.limit);
let time_ms = start_time.elapsed().as_millis();

let documents = query_result
.iter()
.map(|r| Document {
id: r.id,
score: r.score,
path: r.path.clone(),
})
.collect();

HttpServer::new(move || {
App::new()
.app_data(web::Data::new(Mutex::new(
QueryProcessor::build_query_processor(&index_path, &tokenizer_path),
)))
.service(query)
})
.bind(("127.0.0.1", 8080))?
.run()
.await
HtmlTemplate(QueryResponse { time_ms, documents })
}
11 changes: 11 additions & 0 deletions client/templates/query.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<h1>Query result</h1>

<p>
Query completed in {{ time_ms }} ms
<p>

<ul>
{% for doc in documents %}
<li>{{ loop.index }} - {{ doc.path }}</li>
{% endfor %}
</ul>

0 comments on commit 02a1cd7

Please sign in to comment.