Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Make soft deletion optional in document addition and deletion + add lots of tests #720

Merged
merged 3 commits into from
Dec 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
547 changes: 545 additions & 2 deletions milli/src/index.rs

Large diffs are not rendered by default.

93 changes: 91 additions & 2 deletions milli/src/search/facet/facet_sort_ascending.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
// that we found all the documents in the sub level iterations already,
// we can pop this level iterator.
if documents_ids.is_empty() {
// break our of the for loop into the end of the 'outer loop, which
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// break our of the for loop into the end of the 'outer loop, which
// break out of the for loop into the end of the 'outer loop, which

// pops the stack
break;
}

Expand Down Expand Up @@ -113,11 +115,14 @@ mod tests {

use crate::milli_snap;
use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
use crate::search::facet::tests::{
get_random_looking_index, get_random_looking_string_index_with_multiple_field_ids,
get_simple_index, get_simple_string_index_with_multiple_field_ids,
};
use crate::snapshot_tests::display_bitmap;

#[test]
fn filter_sort() {
fn filter_sort_ascending() {
let indexes = [get_simple_index(), get_random_looking_index()];
for (i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
Expand All @@ -134,4 +139,88 @@ mod tests {
txn.commit().unwrap();
}
}

#[test]
fn filter_sort_ascending_multiple_field_ids() {
let indexes = [
get_simple_string_index_with_multiple_field_ids(),
get_random_looking_string_index_with_multiple_field_ids(),
];
for (i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
for el in iter {
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
milli_snap!(results, format!("{i}-0"));

let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap();
for el in iter {
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
milli_snap!(results, format!("{i}-1"));

txn.commit().unwrap();
}
}

#[test]
fn filter_sort_ascending_with_no_candidates() {
let indexes = [
get_simple_string_index_with_multiple_field_ids(),
get_random_looking_string_index_with_multiple_field_ids(),
];
for (_i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let candidates = RoaringBitmap::new();
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
for el in iter {
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
assert!(results.is_empty());

let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap();
for el in iter {
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
assert!(results.is_empty());

txn.commit().unwrap();
}
}

#[test]
fn filter_sort_ascending_with_inexisting_field_id() {
let indexes = [
get_simple_string_index_with_multiple_field_ids(),
get_random_looking_string_index_with_multiple_field_ids(),
];
for (_i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let candidates = RoaringBitmap::new();
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap();
for el in iter {
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
assert!(results.is_empty());

txn.commit().unwrap();
}
}
}
97 changes: 95 additions & 2 deletions milli/src/search/facet/facet_sort_descending.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,20 @@ mod tests {
use crate::heed_codec::ByteSliceRefCodec;
use crate::milli_snap;
use crate::search::facet::facet_sort_descending::descending_facet_sort;
use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
use crate::search::facet::tests::{
get_random_looking_index, get_random_looking_string_index_with_multiple_field_ids,
get_simple_index, get_simple_index_with_multiple_field_ids,
get_simple_string_index_with_multiple_field_ids,
};
use crate::snapshot_tests::display_bitmap;

#[test]
fn filter_sort_descending() {
let indexes = [get_simple_index(), get_random_looking_index()];
let indexes = [
get_simple_index(),
get_random_looking_index(),
get_simple_index_with_multiple_field_ids(),
];
for (i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
Expand All @@ -147,4 +155,89 @@ mod tests {
txn.commit().unwrap();
}
}

#[test]
fn filter_sort_descending_multiple_field_ids() {
let indexes = [
get_simple_string_index_with_multiple_field_ids(),
get_random_looking_string_index_with_multiple_field_ids(),
];
for (i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
let mut results = String::new();
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap();
for el in iter {
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
milli_snap!(results, format!("{i}-0"));

let mut results = String::new();

let iter = descending_facet_sort(&txn, db, 1, candidates).unwrap();
for el in iter {
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
milli_snap!(results, format!("{i}-1"));

txn.commit().unwrap();
}
}
#[test]
fn filter_sort_ascending_with_no_candidates() {
let indexes = [
get_simple_string_index_with_multiple_field_ids(),
get_random_looking_string_index_with_multiple_field_ids(),
];
for (_i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let candidates = RoaringBitmap::new();
let mut results = String::new();
let iter = descending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
for el in iter {
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
assert!(results.is_empty());

let mut results = String::new();
let iter = descending_facet_sort(&txn, index.content, 1, candidates).unwrap();
for el in iter {
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
assert!(results.is_empty());

txn.commit().unwrap();
}
}

#[test]
fn filter_sort_ascending_with_inexisting_field_id() {
let indexes = [
get_simple_string_index_with_multiple_field_ids(),
get_random_looking_string_index_with_multiple_field_ids(),
];
for (_i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let candidates = RoaringBitmap::new();
let mut results = String::new();
let iter = descending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap();
for el in iter {
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
assert!(results.is_empty());

txn.commit().unwrap();
}
}
}
42 changes: 41 additions & 1 deletion milli/src/search/facet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ pub(crate) mod tests {
use roaring::RoaringBitmap;

use crate::heed_codec::facet::OrderedF64Codec;
use crate::update::facet::tests::FacetIndex;
use crate::heed_codec::StrRefCodec;
use crate::update::facet::test_helpers::FacetIndex;

pub fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
Expand Down Expand Up @@ -147,4 +148,43 @@ pub(crate) mod tests {
txn.commit().unwrap();
index
}
pub fn get_simple_string_index_with_multiple_field_ids() -> FacetIndex<StrRefCodec> {
let index = FacetIndex::<StrRefCodec>::new(4, 8, 5);
let mut txn = index.env.write_txn().unwrap();
for fid in 0..2 {
for i in 0..256u16 {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(i as u32);
if i % 2 == 0 {
index.insert(&mut txn, fid, &format!("{i}").as_str(), &bitmap);
} else {
index.insert(&mut txn, fid, &"", &bitmap);
}
}
}
txn.commit().unwrap();
index
}
pub fn get_random_looking_string_index_with_multiple_field_ids() -> FacetIndex<StrRefCodec> {
let index = FacetIndex::<StrRefCodec>::new(4, 8, 5);
let mut txn = index.env.write_txn().unwrap();

let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
let keys =
std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
for fid in 0..2 {
for (_i, &key) in keys.iter().enumerate() {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(key);
bitmap.insert(key + 100);
if key % 2 == 0 {
index.insert(&mut txn, fid, &format!("{key}").as_str(), &bitmap);
} else {
index.insert(&mut txn, fid, &"", &bitmap);
}
}
}
txn.commit().unwrap();
index
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
---
source: milli/src/search/facet/facet_sort_ascending.rs
---
[201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255, ]
[200, ]
[202, ]
[204, ]
[206, ]
[208, ]
[210, ]
[212, ]
[214, ]
[216, ]
[218, ]
[220, ]
[222, ]
[224, ]
[226, ]
[228, ]
[230, ]
[232, ]
[234, ]
[236, ]
[238, ]
[240, ]
[242, ]
[244, ]
[246, ]
[248, ]
[250, ]
[252, ]
[254, ]

Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
---
source: milli/src/search/facet/facet_sort_ascending.rs
---
[201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255, ]
[200, ]
[202, ]
[204, ]
[206, ]
[208, ]
[210, ]
[212, ]
[214, ]
[216, ]
[218, ]
[220, ]
[222, ]
[224, ]
[226, ]
[228, ]
[230, ]
[232, ]
[234, ]
[236, ]
[238, ]
[240, ]
[242, ]
[244, ]
[246, ]
[248, ]
[250, ]
[252, ]
[254, ]

Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
---
source: milli/src/search/facet/facet_sort_ascending.rs
---
[201, 203, 205, 207, 209, 211, 215, 219, 223, 231, 233, 235, 237, 239, 241, 243, 247, 263, 267, 269, 273, 277, 279, 281, 289, 293, 295, 297, ]
[202, ]
[224, ]
[230, ]
[236, ]
[244, ]
[250, ]
[256, ]
[258, ]
[260, ]
[262, ]
[264, ]
[278, ]
[282, ]
[286, ]
[292, ]
[206, ]
[208, ]
[210, ]
[216, ]
[220, ]
[226, ]
[238, ]

Loading