Skip to content

Commit

Permalink
Get rid of high cpu load
Browse files Browse the repository at this point in the history
  • Loading branch information
fat-fellow committed Sep 2, 2024
1 parent c0c8267 commit aa8e992
Show file tree
Hide file tree
Showing 12 changed files with 264 additions and 247 deletions.
2 changes: 1 addition & 1 deletion binding_typedefs.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
typedef struct Index Index;
typedef struct TantivyContext TantivyContext;
typedef struct SchemaBuilder SchemaBuilder;
typedef struct Document Document;
typedef struct SearchResult SearchResult;
Expand Down
96 changes: 50 additions & 46 deletions bindings.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ typedef struct Document Document;

typedef struct SearchResult SearchResult;

typedef struct TantivyContext TantivyContext;

SchemaBuilder *schema_builder_new(void);

void schema_builder_add_text_field(SchemaBuilder *builder_ptr,
Expand All @@ -23,54 +25,56 @@ void schema_builder_add_text_field(SchemaBuilder *builder_ptr,

Schema *schema_builder_build(SchemaBuilder *builder_ptr, char **error_buffer);

Index *index_create_with_schema(const char *path_ptr, Schema *schema_ptr, char **error_buffer);

void index_register_text_analyzer_ngram(Index *index_ptr,
struct TantivyContext *context_create_with_schema(const char *path_ptr,
Schema *schema_ptr,
char **error_buffer);

void context_register_text_analyzer_ngram(struct TantivyContext *context_ptr,
const char *tokenizer_name_ptr,
uintptr_t min_gram,
uintptr_t max_gram,
bool prefix_only,
char **error_buffer);

void context_register_text_analyzer_edge_ngram(struct TantivyContext *context_ptr,
const char *tokenizer_name_ptr,
uintptr_t min_gram,
uintptr_t max_gram,
uintptr_t limit,
char **error_buffer);

void context_register_text_analyzer_simple(struct TantivyContext *context_ptr,
const char *tokenizer_name_ptr,
uintptr_t text_limit,
const char *lang_str_ptr,
char **error_buffer);

void context_register_text_analyzer_raw(struct TantivyContext *context_ptr,
const char *tokenizer_name_ptr,
uintptr_t min_gram,
uintptr_t max_gram,
bool prefix_only,
char **error_buffer);

void index_register_text_analyzer_edge_ngram(Index *index_ptr,
const char *tokenizer_name_ptr,
uintptr_t min_gram,
uintptr_t max_gram,
uintptr_t limit,
char **error_buffer);

void index_register_text_analyzer_simple(Index *index_ptr,
const char *tokenizer_name_ptr,
uintptr_t text_limit,
const char *lang_str_ptr,
char **error_buffer);

void index_register_text_analyzer_raw(Index *index_ptr,
const char *tokenizer_name_ptr,
char **error_buffer);

void index_add_and_consume_documents(Index *index_ptr,
struct Document **docs_ptr,
uintptr_t docs_len,
char **error_buffer);

void index_delete_documents(Index *index_ptr,
const char *field_name_ptr,
const char **delete_ids_ptr,
uintptr_t delete_ids_len,
char **error_buffer);

uint64_t index_num_docs(Index *index_ptr, char **error_buffer);

struct SearchResult *index_search(Index *index_ptr,
const char **field_names_ptr,
uintptr_t field_names_len,
const char *query_ptr,
char **error_buffer,
uintptr_t docs_limit,
bool with_highlights);

void index_free(Index *index_ptr);
void context_add_and_consume_documents(struct TantivyContext *context_ptr,
struct Document **docs_ptr,
uintptr_t docs_len,
char **error_buffer);

void context_delete_documents(struct TantivyContext *context_ptr,
const char *field_name_ptr,
const char **delete_ids_ptr,
uintptr_t delete_ids_len,
char **error_buffer);

uint64_t context_num_docs(struct TantivyContext *context_ptr, char **error_buffer);

struct SearchResult *context_search(struct TantivyContext *context_ptr,
const char **field_names_ptr,
uintptr_t field_names_len,
const char *query_ptr,
char **error_buffer,
uintptr_t docs_limit,
bool with_highlights);

void context_free(struct TantivyContext *context_ptr);

uintptr_t search_result_get_size(struct SearchResult *result_ptr, char **error_buffer);

Expand All @@ -85,7 +89,7 @@ struct Document *document_create(void);
void document_add_field(struct Document *doc_ptr,
const char *field_name_ptr,
const char *field_value_ptr,
Index *index_ptr,
struct TantivyContext *context_ptr,
char **error_buffer);

char *document_as_json(struct Document *doc_ptr,
Expand Down
4 changes: 2 additions & 2 deletions document.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@ func NewDocument() *Document {
//
// Returns:
// - error: an error if adding the field fails, or nil if the operation is successful
func (d *Document) AddField(fieldName, fieldValue string, index *Index) error {
func (d *Document) AddField(fieldName, fieldValue string, tc *TantivyContext) error {
cFieldName := C.CString(fieldName)
defer C.string_free(cFieldName)
cFieldValue := C.CString(fieldValue)
defer C.string_free(cFieldValue)
var errBuffer *C.char
C.document_add_field(d.ptr, cFieldName, cFieldValue, index.ptr, &errBuffer)
C.document_add_field(d.ptr, cFieldName, cFieldValue, tc.ptr, &errBuffer)

return tryExtractError(errBuffer)
}
Expand Down
8 changes: 6 additions & 2 deletions example/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@ const NameTitle = "title"

func main() {
// Initialize the library
tantivy_go.LibInit("debug")
err := tantivy_go.LibInit("debug")
if err != nil {
fmt.Println("Failed to initialize library:", err)
return
}
// Create schema builder
builder, err := tantivy_go.NewSchemaBuilder()
if err != nil {
Expand Down Expand Up @@ -72,7 +76,7 @@ func main() {
}
// Create index with schema
_ = os.RemoveAll("index_dir")
index, err := tantivy_go.NewIndexWithSchema("index_dir", schema)
index, err := tantivy_go.NewTantivyContextWithSchema("index_dir", schema)
if err != nil {
fmt.Println("Failed to create index:", err)
return
Expand Down
2 changes: 1 addition & 1 deletion rust/src/c_util/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ pub use self::util::start_lib_init;
pub use self::util::box_from;
pub use self::util::add_and_consume_documents;
pub use self::util::delete_docs;
pub use self::util::create_index_with_schema;
pub use self::util::create_context_with_schema;
pub use self::util::search;
pub use self::util::drop_any;
pub use self::util::get_doc;
Expand Down
59 changes: 28 additions & 31 deletions rust/src/c_util/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@ use std::collections::HashMap;
use std::ffi::{CStr, CString};
use std::os::raw::c_char;
use std::path::Path;

use log::debug;
use serde_json::json;
use tantivy::{Index, IndexWriter, TantivyDocument, Term};
use tantivy::{Index, IndexWriter, TantivyDocument, TantivyError, Term};
use tantivy::directory::MmapDirectory;
use tantivy::query::{QueryParser};
use tantivy::schema::{Field, Schema};

use crate::tantivy_util::{convert_document_to_json, Document, DOCUMENT_BUDGET_BYTES, find_highlights, get_string_field_entry, SearchResult};
use crate::tantivy_util::{convert_document_to_json, Document, TantivyContext, DOCUMENT_BUDGET_BYTES, find_highlights, get_string_field_entry, SearchResult};

pub fn set_error(err: &str, error_buffer: *mut *mut c_char) {
let err_str = match CString::new(err) {
Expand Down Expand Up @@ -159,7 +158,7 @@ pub fn start_lib_init(log_level: &str) {
).try_init();
}

pub fn create_index_with_schema(error_buffer: *mut *mut c_char, schema: Schema, path: &str) -> Result<*mut Index, ()> {
pub fn create_context_with_schema(error_buffer: *mut *mut c_char, schema: Schema, path: &str) -> Result<*mut TantivyContext, ()> {
match fs::create_dir_all(Path::new(path)) {
Err(e) => {
debug!("Failed to create directories: {}", e);
Expand All @@ -177,30 +176,41 @@ pub fn create_index_with_schema(error_buffer: *mut *mut c_char, schema: Schema,
}
};

Ok(match Index::open_or_create(dir, schema) {
Ok(index) => Box::into_raw(Box::new(index)),
Ok(match create_tantivy_context(dir, schema) {
Ok(ctx) => Box::into_raw(Box::new(ctx)),
Err(err) => {
set_error(&err.to_string(), error_buffer);
return Err(());
}
})
}

fn create_tantivy_context(dir: MmapDirectory, schema: Schema) -> Result<TantivyContext, TantivyError> {
let index = Index::open_or_create(dir, schema)?;
let writer = index.writer(DOCUMENT_BUDGET_BYTES)?;
let reader = index.reader()?;
return Ok(TantivyContext::new(
index,
writer,
reader,
));
}

pub fn add_and_consume_documents(
docs_ptr: *mut *mut Document,
docs_len: usize,
error_buffer: *mut *mut c_char,
mut index_writer: IndexWriter,
writer: &mut IndexWriter,
) {
if process_type_slice(docs_ptr, error_buffer, docs_len, |doc| {
let doc = *box_from(doc);
let _ = index_writer.add_document(doc.tantivy_doc);
let _ = writer.add_document(doc.tantivy_doc);
Ok(())
}).is_err() {
return;
}

if index_writer.commit().is_err() {
if writer.commit().is_err() {
set_error("Failed to commit document", error_buffer)
}
}
Expand All @@ -209,15 +219,10 @@ pub fn delete_docs(
delete_ids_ptr: *mut *const c_char,
delete_ids_len: usize,
error_buffer: *mut *mut c_char,
index: &mut Index,
context: &mut TantivyContext,
field_name: &str,
) {
let mut index_writer: IndexWriter<TantivyDocument> = match index.writer(DOCUMENT_BUDGET_BYTES) {
Ok(writer) => writer,
Err(_) => return
};

let schema = index.schema();
let schema = context.index.schema();

let field = match schema_apply_for_field::<Field, (), _>
(error_buffer, schema.clone(), field_name, |field, _| {
Expand All @@ -231,13 +236,13 @@ pub fn delete_docs(
};

if process_string_slice(delete_ids_ptr, error_buffer, delete_ids_len, |id_value| {
let _ = index_writer.delete_term(Term::from_field_text(field, id_value));
let _ = context.writer.delete_term(Term::from_field_text(field, id_value));
Ok(())
}).is_err() {
return;
}

if index_writer.commit().is_err() {
if context.writer.commit().is_err() {
set_error("Failed to commit removing", error_buffer)
}
}
Expand All @@ -259,7 +264,7 @@ pub fn get_doc<'a>(
pub fn add_field(
error_buffer: *mut *mut c_char,
doc: &mut Document,
index: &mut Index,
index: &Index,
field_name: &str,
field_value: &str,
) {
Expand All @@ -281,19 +286,11 @@ pub fn search(
query_ptr: *const c_char,
error_buffer: *mut *mut c_char,
docs_limit: usize,
index: &mut Index,
context: &mut TantivyContext,
with_highlights: bool,
) -> Result<*mut SearchResult, ()> {
let reader = match index.reader() {
Ok(reader) => reader,
Err(err) => {
set_error(&err.to_string(), error_buffer);
return Err(());
}
};

let searcher = reader.searcher();
let schema = index.schema();
let searcher = &context.reader().searcher();
let schema = context.index.schema();

let mut fields = Vec::with_capacity(field_names_len);

Expand All @@ -312,7 +309,7 @@ pub fn search(
None => return Err(())
};

let query_parser = QueryParser::for_index(index, fields);
let query_parser = QueryParser::for_index(&context.index, fields);

let query = match query_parser.parse_query(query) {
Ok(query) => query,
Expand Down
Loading

0 comments on commit aa8e992

Please sign in to comment.