Skip to content

Commit

Permalink
add week 2 day 5 + 6 tests
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Chi <iskyzh@gmail.com>
  • Loading branch information
skyzh committed Jan 26, 2024
1 parent 595016f commit 14c3be3
Show file tree
Hide file tree
Showing 12 changed files with 206 additions and 27 deletions.
12 changes: 0 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,18 +71,6 @@ We are working on chapter 3 and more test cases for all existing contents.
| 3.5 | Transactions and Optimistic Concurrency Control | | | |
| 3.6 | Serializable Snapshot Isolation | | | |
| 3.7 | TTL (Time-to-Live) Entries | | | |
| 4.1 | Benchmarking | | | |
| 4.2 | Block Compression | | | |
| 4.3 | Trivial Move and Parallel Compaction | | | |
| 4.4 | Alternative Block Encodings | | | |
| 4.5 | Rate Limiter and I/O Optimizations | | | |
| 4.6 | Build Your Own Block Cache | | | |
| 4.7 | Build Your Own SkipList | | | |
| 4.8 | Async Engine | | | |
| 4.9 | Key-Value Separation | | | |
| 4.10 | Column Families | | | |
| 4.11 | Sharding | | | |
| 4.12 | SQL over Mini-LSM | | | |

## License

Expand Down
2 changes: 1 addition & 1 deletion mini-lsm-book/src/week2-07-snacks.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<!-- ![Chapter Overview](./lsm-tutorial/week2-07-overview.svg) -->

at the end of each week, we will have some easy, not important, while interesting things
In the previous chapter, you already built a full LSM-based storage engine with. At the end of this week, we will implement some easy but important optimizations of the storage engine. Welcome to Mini-LSM's week w snack time!

In this chapter, you will:

Expand Down
9 changes: 5 additions & 4 deletions mini-lsm-book/src/week4-overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ This is an advanced part that deep dives into optimizations and applications of
| 4.7 | Build Your Own SkipList | | | |
| 4.8 | Async Engine | | | |
| 4.9 | IO-uring-based I/O engine | | | |
| 4.10 | Key-Value Separation | | | |
| 4.11 | Column Families | | | |
| 4.12 | Sharding | | | |
| 4.13 | SQL over Mini-LSM | | | |
| 4.10 | Prefetching | | | |
| 4.11 | Key-Value Separation | | | |
| 4.12 | Column Families | | | |
| 4.13 | Sharding | | | |
| 4.14 | SQL over Mini-LSM | | | |
2 changes: 1 addition & 1 deletion mini-lsm-mvcc/src/compact.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ impl LsmStorageInner {
let builder_inner = builder.as_mut().unwrap();
builder_inner.add(iter.key(), iter.value());

let same_as_last_key = iter.key().key_ref() == &last_key;
let same_as_last_key = iter.key().key_ref() == last_key;

if builder_inner.estimated_size() >= self.options.target_sst_size && !same_as_last_key {
let sst_id = self.next_sst_id();
Expand Down
1 change: 1 addition & 0 deletions mini-lsm-mvcc/src/lsm_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ impl LsmStorageState {
}
}

#[derive(Debug, Clone)]
pub struct LsmStorageOptions {
// Block size in bytes
pub block_size: usize,
Expand Down
1 change: 1 addition & 0 deletions mini-lsm-starter/src/lsm_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ impl LsmStorageState {
}
}

#[derive(Debug, Clone)]
pub struct LsmStorageOptions {
// Block size in bytes
pub block_size: usize,
Expand Down
19 changes: 16 additions & 3 deletions mini-lsm/src/compact.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ impl LsmStorageInner {
let CompactionOptions::NoCompaction = self.options.compaction_options else {
panic!("full compaction can only be called with compaction is not enabled")
};

let snapshot = {
let state = self.state.read();
state.clone()
Expand All @@ -264,23 +265,26 @@ impl LsmStorageInner {
l0_sstables: l0_sstables.clone(),
l1_sstables: l1_sstables.clone(),
};

println!("force full compaction: {:?}", compaction_task);

let sstables = self.compact(&compaction_task)?;
let mut ids = Vec::with_capacity(sstables.len());

{
let _state_lock = self.state_lock.lock();
let state_lock = self.state_lock.lock();
let mut state = self.state.read().as_ref().clone();
for sst in l0_sstables.iter().chain(l1_sstables.iter()) {
let result = state.sstables.remove(sst);
assert!(result.is_some());
}
let mut ids = Vec::with_capacity(sstables.len());
for new_sst in sstables {
ids.push(new_sst.sst_id());
let result = state.sstables.insert(new_sst.sst_id(), new_sst);
assert!(result.is_none());
}
assert_eq!(l1_sstables, state.levels[0].1);
state.levels[0].1 = ids;
state.levels[0].1 = ids.clone();
let mut l0_sstables_map = l0_sstables.iter().copied().collect::<HashSet<_>>();
state.l0_sstables = state
.l0_sstables
Expand All @@ -290,10 +294,18 @@ impl LsmStorageInner {
.collect::<Vec<_>>();
assert!(l0_sstables_map.is_empty());
*self.state.write() = Arc::new(state);
self.sync_dir()?;
self.manifest.as_ref().unwrap().add_record(
&state_lock,
ManifestRecord::Compaction(compaction_task, ids.clone()),
)?;
}
for sst in l0_sstables.iter().chain(l1_sstables.iter()) {
std::fs::remove_file(self.path_of_sst(*sst))?;
}

println!("force full compaction done, new SSTs: {:?}", ids);

Ok(())
}

Expand All @@ -308,6 +320,7 @@ impl LsmStorageInner {
let Some(task) = task else {
return Ok(());
};
self.dump_structure();
println!("running compaction task: {:?}", task);
let sstables = self.compact(&task)?;
let output = sstables.iter().map(|x| x.sst_id()).collect::<Vec<_>>();
Expand Down
3 changes: 2 additions & 1 deletion mini-lsm/src/lsm_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ impl LsmStorageState {
}
}

#[derive(Debug, Clone)]
pub struct LsmStorageOptions {
// Block size in bytes
pub block_size: usize,
Expand Down Expand Up @@ -363,7 +364,7 @@ impl LsmStorageInner {
table_id,
Some(block_cache.clone()),
FileObject::open(&Self::path_of_sst_static(path, table_id))
.context("failed to open SST")?,
.with_context(|| format!("failed to open SST: {}", table_id))?,
)?;
state.sstables.insert(table_id, Arc::new(sst));
sst_cnt += 1;
Expand Down
2 changes: 2 additions & 0 deletions mini-lsm/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ mod week2_day1;
mod week2_day2;
mod week2_day3;
mod week2_day4;
mod week2_day5;
mod week2_day6;
24 changes: 19 additions & 5 deletions mini-lsm/src/tests/harness.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{collections::BTreeMap, path::Path, sync::Arc, time::Duration};
use std::{collections::BTreeMap, ops::Bound, path::Path, sync::Arc, time::Duration};

use anyhow::{bail, Result};
use bytes::Bytes;
Expand All @@ -9,7 +9,7 @@ use crate::{
TieredCompactionOptions,
},
iterators::StorageIterator,
key::KeySlice,
key::{KeySlice, TS_ENABLED},
lsm_storage::{BlockCache, LsmStorageInner, MiniLsm},
table::{SsTable, SsTableBuilder},
};
Expand Down Expand Up @@ -171,11 +171,12 @@ pub fn compaction_bench(storage: Arc<MiniLsm>) {
let gen_key = |i| format!("{:010}", i); // 10B
let gen_value = |i| format!("{:0110}", i); // 110B
let mut max_key = 0;
let overlaps = if TS_ENABLED { 10000 } else { 20000 };
for iter in 0..10 {
let range_begin = iter * 5000;
for i in range_begin..(range_begin + 10000) {
for i in range_begin..(range_begin + overlaps) {
// 120B per key, 4MB data populated
let key = gen_key(i);
let key: String = gen_key(i);
let version = key_map.get(&i).copied().unwrap_or_default() + 1;
let value = gen_value(version);
key_map.insert(i, version);
Expand All @@ -184,17 +185,24 @@ pub fn compaction_bench(storage: Arc<MiniLsm>) {
}
}

let mut expected_key_value_pairs = Vec::new();
for i in 0..(max_key + 40000) {
let key = gen_key(i);
let value = storage.get(key.as_bytes()).unwrap();
if let Some(val) = key_map.get(&i) {
let expected_value = gen_value(*val);
assert_eq!(value, Some(Bytes::from(expected_value)));
assert_eq!(value, Some(Bytes::from(expected_value.clone())));
expected_key_value_pairs.push((Bytes::from(key), Bytes::from(expected_value)));
} else {
assert!(value.is_none());
}
}

check_lsm_iter_result_by_key(
&mut storage.scan(Bound::Unbounded, Bound::Unbounded).unwrap(),
expected_key_value_pairs,
);

while {
let snapshot = storage.inner.state.read();
!snapshot.imm_memtables.is_empty()
Expand Down Expand Up @@ -324,3 +332,9 @@ pub fn check_compaction_ratio(storage: Arc<MiniLsm>) {
}
}
}

pub fn dump_files_in_dir(path: impl AsRef<Path>) {
for f in path.as_ref().read_dir().unwrap() {
println!("{}", f.unwrap().path().display())
}
}
81 changes: 81 additions & 0 deletions mini-lsm/src/tests/week2_day5.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
use tempfile::tempdir;

use crate::{
compact::{
CompactionOptions, LeveledCompactionOptions, SimpleLeveledCompactionOptions,
TieredCompactionOptions,
},
lsm_storage::{LsmStorageOptions, MiniLsm},
tests::harness::dump_files_in_dir,
};

#[test]
fn test_integration_leveled() {
test_integration(CompactionOptions::Leveled(LeveledCompactionOptions {
level_size_multiplier: 2,
level0_file_num_compaction_trigger: 2,
max_levels: 3,
base_level_size_mb: 1,
}))
}

#[test]
fn test_integration_tiered() {
test_integration(CompactionOptions::Tiered(TieredCompactionOptions {
num_tiers: 3,
max_size_amplification_percent: 200,
size_ratio: 1,
min_merge_width: 3,
}))
}

#[test]
fn test_integration_simple() {
test_integration(CompactionOptions::Simple(SimpleLeveledCompactionOptions {
size_ratio_percent: 200,
level0_file_num_compaction_trigger: 2,
max_levels: 3,
}));
}

fn test_integration(compaction_options: CompactionOptions) {
let dir = tempdir().unwrap();
let storage = MiniLsm::open(
&dir,
LsmStorageOptions::default_for_week2_test(compaction_options.clone()),
)
.unwrap();
for i in 0..=20 {
storage.put(b"0", format!("v{}", i).as_bytes()).unwrap();
if i % 2 == 0 {
storage.put(b"1", format!("v{}", i).as_bytes()).unwrap();
} else {
storage.delete(b"1").unwrap();
}
if i % 2 == 1 {
storage.put(b"2", format!("v{}", i).as_bytes()).unwrap();
} else {
storage.delete(b"2").unwrap();
}
storage
.inner
.force_freeze_memtable(&storage.inner.state_lock.lock())
.unwrap();
}
storage.close().unwrap();
// ensure all SSTs are flushed
assert!(storage.inner.state.read().memtable.is_empty());
assert!(storage.inner.state.read().imm_memtables.is_empty());
storage.dump_structure();
drop(storage);
dump_files_in_dir(&dir);

let storage = MiniLsm::open(
&dir,
LsmStorageOptions::default_for_week2_test(compaction_options.clone()),
)
.unwrap();
assert_eq!(&storage.get(b"0").unwrap().unwrap()[..], b"v20".as_slice());
assert_eq!(&storage.get(b"1").unwrap().unwrap()[..], b"v20".as_slice());
assert_eq!(storage.get(b"2").unwrap(), None);
}
77 changes: 77 additions & 0 deletions mini-lsm/src/tests/week2_day6.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
use tempfile::tempdir;

use crate::{
compact::{
CompactionOptions, LeveledCompactionOptions, SimpleLeveledCompactionOptions,
TieredCompactionOptions,
},
lsm_storage::{LsmStorageOptions, MiniLsm},
tests::harness::dump_files_in_dir,
};

#[test]
fn test_integration_leveled() {
test_integration(CompactionOptions::Leveled(LeveledCompactionOptions {
level_size_multiplier: 2,
level0_file_num_compaction_trigger: 2,
max_levels: 3,
base_level_size_mb: 1,
}))
}

#[test]
fn test_integration_tiered() {
test_integration(CompactionOptions::Tiered(TieredCompactionOptions {
num_tiers: 3,
max_size_amplification_percent: 200,
size_ratio: 1,
min_merge_width: 3,
}))
}

#[test]
fn test_integration_simple() {
test_integration(CompactionOptions::Simple(SimpleLeveledCompactionOptions {
size_ratio_percent: 200,
level0_file_num_compaction_trigger: 2,
max_levels: 3,
}));
}

fn test_integration(compaction_options: CompactionOptions) {
let dir = tempdir().unwrap();
let mut options = LsmStorageOptions::default_for_week2_test(compaction_options);
options.enable_wal = true;
let storage = MiniLsm::open(&dir, options.clone()).unwrap();
for i in 0..=20 {
storage.put(b"0", format!("v{}", i).as_bytes()).unwrap();
if i % 2 == 0 {
storage.put(b"1", format!("v{}", i).as_bytes()).unwrap();
} else {
storage.delete(b"1").unwrap();
}
if i % 2 == 1 {
storage.put(b"2", format!("v{}", i).as_bytes()).unwrap();
} else {
storage.delete(b"2").unwrap();
}
storage
.inner
.force_freeze_memtable(&storage.inner.state_lock.lock())
.unwrap();
}
storage.close().unwrap();
// ensure some SSTs are not flushed
assert!(
!storage.inner.state.read().memtable.is_empty()
|| !storage.inner.state.read().imm_memtables.is_empty()
);
storage.dump_structure();
drop(storage);
dump_files_in_dir(&dir);

let storage = MiniLsm::open(&dir, options).unwrap();
assert_eq!(&storage.get(b"0").unwrap().unwrap()[..], b"v20".as_slice());
assert_eq!(&storage.get(b"1").unwrap().unwrap()[..], b"v20".as_slice());
assert_eq!(storage.get(b"2").unwrap(), None);
}

0 comments on commit 14c3be3

Please sign in to comment.