Skip to content

Commit

Permalink
Add min version for ios simulator
Browse files Browse the repository at this point in the history
Add ascii folding
  • Loading branch information
fat-fellow committed Aug 26, 2024
1 parent 2d22721 commit ac86fb1
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 8 deletions.
12 changes: 6 additions & 6 deletions rust/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -85,42 +85,42 @@ install-debug-android-amd64: build-debug-android-amd64
@cp target/x86_64-linux-android/debug/libtantivy_go.a ../libs/android-amd64

build-ios-arm64:
cargo build --release --target aarch64-apple-ios
env IPHONEOS_DEPLOYMENT_TARGET=15.0 cargo build --release --target aarch64-apple-ios

install-ios-arm64: build-ios-arm64
@mkdir -p ../libs/ios-arm64
@cp target/aarch64-apple-ios/release/libtantivy_go.a ../libs/ios-arm64

build-debug-ios-arm64:
cargo build --target aarch64-apple-ios
env IPHONEOS_DEPLOYMENT_TARGET=15.0 cargo build --target aarch64-apple-ios

install-debug-ios-arm64: build-debug-ios-arm64
@mkdir -p ../libs/ios-arm64
@cp target/aarch64-apple-ios/debug/libtantivy_go.a ../libs/ios-arm64

build-ios-arm64-sim:
cargo build --release --target aarch64-apple-ios-sim
env IPHONEOS_DEPLOYMENT_TARGET=15.0 cargo build --release --target aarch64-apple-ios-sim

install-ios-arm64-sim: build-ios-arm64-sim
@mkdir -p ../libs/ios-arm64-sim
@cp target/aarch64-apple-ios-sim/release/libtantivy_go.a ../libs/ios-arm64-sim

build-debug-ios-arm64-sim:
cargo build --target aarch64-apple-ios-sim
env IPHONEOS_DEPLOYMENT_TARGET=15.0 cargo build --target aarch64-apple-ios-sim

install-debug-ios-arm64-sim: build-debug-ios-arm64-sim
@mkdir -p ../libs/ios-arm64-sim
@cp target/aarch64-apple-ios-sim/debug/libtantivy_go.a ../libs/ios-arm64-sim

build-ios-amd64:
cargo build --release --target x86_64-apple-ios
env IPHONEOS_DEPLOYMENT_TARGET=15.0 cargo build --release --target x86_64-apple-ios

install-ios-amd64: build-ios-amd64
@mkdir -p ../libs/ios-amd64
@cp target/x86_64-apple-ios/release/libtantivy_go.a ../libs/ios-amd64

build-debug-ios-amd64:
cargo build --target x86_64-apple-ios
env IPHONEOS_DEPLOYMENT_TARGET=15.0 cargo build --target x86_64-apple-ios

install-debug-ios-amd64: build-debug-ios-amd64
@mkdir -p ../libs/ios-amd64
Expand Down
68 changes: 68 additions & 0 deletions rust/src/tantivy_util/highlights.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,72 @@ pub fn find_highlights(
}
}
Ok(highlights)
}

mod tests {
use tantivy::tokenizer::*;
use tantivy::schema::*;
use tantivy::{Index, DocAddress, doc, DocId};
use tantivy::collector::TopDocs;
use tantivy::query::QueryParser;
use tantivy::schema::document::DocumentDeserialize;

#[test]
fn test_ascii_folding_filter() {
// Определяем схему
let mut schema_builder = Schema::builder();
let mut text_options = TEXT;
text_options = text_options | STORED;
text_options = text_options.set_indexing_options(
TextFieldIndexing::default()
.set_tokenizer("custom")
.set_index_option(IndexRecordOption::WithFreqsAndPositions)
);
let text = schema_builder.add_text_field("text", text_options);
let schema = schema_builder.build();

// Создаем индекс
let index = Index::create_in_ram(schema.clone());

// Создаем кастомный токенайзер с AsciiFoldingFilter
let tokenizer = TextAnalyzer::builder(SimpleTokenizer::default())
.filter(LowerCaser)
.filter(AsciiFoldingFilter)
.build();

index.tokenizers().register("custom", tokenizer);

// Добавляем документы в индекс
let mut index_writer = index.writer(50_000_000).unwrap();
index_writer.add_document(doc!(text => "strasse"));
index_writer.add_document(doc!(text => "straße"));
index_writer.commit().unwrap();

// Создаем QueryParser с кастомным токенайзером
let query_parser = QueryParser::for_index(&index, vec![text]);

// Выполняем поиск по "strasse"
let searcher = index.reader().unwrap().searcher();
let query = query_parser.parse_query("straße").unwrap();
let top_docs = searcher.search(&query, &TopDocs::with_limit(10)).unwrap();

assert_eq!(top_docs.len(), 2);

// Проверяем совпадение документов
if let Some((_, doc_address)) = top_docs.get(0) {
let first_doc : TantivyDocument = searcher.doc(*doc_address).unwrap();
let first_text = first_doc.get_first(text).unwrap().as_str().unwrap();
assert!(first_text == "strasse" || first_text == "straße");
} else {
panic!("First document not found");
}

if let Some((_, doc_address)) = top_docs.get(1) {
let second_doc : TantivyDocument= searcher.doc(*doc_address).unwrap();
let second_text = second_doc.get_first(text).unwrap().as_str().unwrap();
assert!(second_text == "strasse" || second_text == "straße");
} else {
panic!("Second document not found");
}
}
}
5 changes: 4 additions & 1 deletion rust/src/tantivy_util/tokenizer.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use tantivy::{Index, TantivyError};
use tantivy::tokenizer::{LowerCaser, NgramTokenizer, RawTokenizer, RemoveLongFilter, SimpleTokenizer, TextAnalyzer};
use tantivy::tokenizer::{AsciiFoldingFilter, LowerCaser, NgramTokenizer, RawTokenizer, RemoveLongFilter, SimpleTokenizer, TextAnalyzer};
use crate::tantivy_util::{EdgeNgramTokenizer};
use crate::tantivy_util::stemmer::create_stemmer;

Expand All @@ -21,6 +21,7 @@ pub fn register_edge_ngram_tokenizer(
limit
))
.filter(LowerCaser)
.filter(AsciiFoldingFilter)
.build();

register_tokenizer(index, tokenizer_name, text_analyzer);
Expand All @@ -35,6 +36,7 @@ pub fn register_simple_tokenizer(
let text_analyzer = TextAnalyzer::builder(SimpleTokenizer::default())
.filter(RemoveLongFilter::limit(text_limit))
.filter(LowerCaser)
.filter(AsciiFoldingFilter)
.filter(create_stemmer(lang))
.build();

Expand Down Expand Up @@ -62,6 +64,7 @@ pub fn register_ngram_tokenizer(

let text_analyzer = TextAnalyzer::builder(tokenizer)
.filter(LowerCaser)
.filter(AsciiFoldingFilter)
.build();

register_tokenizer(index, tokenizer_name, text_analyzer);
Expand Down
47 changes: 46 additions & 1 deletion tantivy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@ import (
"os"
"testing"

"github.com/anyproto/tantivy-go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/anyproto/tantivy-go"
)

const NameBody = "body"
Expand Down Expand Up @@ -199,6 +200,50 @@ func Test(t *testing.T) {
require.Equal(t, uint64(0), docs)
})

t.Run("docs search - when ascii folding", func(t *testing.T) {
_, index := fx(t, limit, 1, false)

defer index.Free()

doc, err := addDoc(t, "Idées fête", "mères straße", "1", index)
require.NoError(t, err)

err = index.AddAndConsumeDocuments(doc)
require.NoError(t, err)

docs, err := index.NumDocs()
require.NoError(t, err)
require.Equal(t, uint64(1), docs)

result, err := index.Search("Idées fête", 100, true, NameTitle)
require.NoError(t, err)

size, err := result.GetSize()
defer result.Free()
require.Equal(t, 1, int(size))

result2, err := index.Search("idees fete", 100, true, NameTitle)
require.NoError(t, err)

size2, err := result2.GetSize()
defer result2.Free()
require.Equal(t, 1, int(size2))

result3, err := index.Search("straße", 100, true, NameBody)
require.NoError(t, err)

size3, err := result3.GetSize()
defer result3.Free()
require.Equal(t, 1, int(size3))

result4, err := index.Search("strasse", 100, true, NameBody)
require.NoError(t, err)

size4, err := result4.GetSize()
defer result4.Free()
require.Equal(t, 1, int(size4))
})

t.Run("docs search and remove - when fast", func(t *testing.T) {
_, index := fx(t, limit, minGram, false)

Expand Down

0 comments on commit ac86fb1

Please sign in to comment.