Skip to content

Commit

Permalink
feat: trie-iteration-benchmark - full and shallow iteration (#9114)
Browse files Browse the repository at this point in the history
This is step 1 of the following plan to benchmark how long does it take to perform shallow trie iteration. For more details about the goal please see #9101. 

1. Add a neard subcommand that does full trie iteration. 
2. Extend it to actually measure and report the time it takes.
3. Extend it with shallow iteration. 

Once all three steps are done it will be possible to benchmark the full iteration, the shallow iteration and compare the time it takes for each.
  • Loading branch information
wacban authored Jun 6, 2023
1 parent 8a7054f commit ee67af2
Show file tree
Hide file tree
Showing 5 changed files with 508 additions and 26 deletions.
13 changes: 13 additions & 0 deletions core/primitives/src/state_record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,19 @@ impl StateRecord {
_ => unreachable!(),
}
}

pub fn get_type_string(&self) -> String {
match self {
StateRecord::Account { .. } => "Account",
StateRecord::Data { .. } => "Data",
StateRecord::Contract { .. } => "Contract",
StateRecord::AccessKey { .. } => "AccessKey",
StateRecord::PostponedReceipt { .. } => "PostponedReceipt",
StateRecord::ReceivedData { .. } => "ReceivedData",
StateRecord::DelayedReceipt { .. } => "DelayedReceipt",
}
.to_string()
}
}

impl Display for StateRecord {
Expand Down
4 changes: 2 additions & 2 deletions core/primitives/src/trie_key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use crate::types::AccountId;
pub(crate) const ACCOUNT_DATA_SEPARATOR: u8 = b',';

/// Type identifiers used for DB key generation to store values in the key-value storage.
pub(crate) mod col {
pub mod col {
/// This column id is used when storing `primitives::account::Account` type about a given
/// `account_id`.
pub const ACCOUNT: u8 = 0;
Expand Down Expand Up @@ -375,7 +375,7 @@ pub mod trie_key_parsers {
Ok(None)
}

fn parse_account_id_from_trie_key_with_separator(
pub fn parse_account_id_from_trie_key_with_separator(
col: u8,
raw_key: &[u8],
col_name: &str,
Expand Down
191 changes: 169 additions & 22 deletions core/store/src/trie/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ impl Crumb {
/// There are two stacks that we track while iterating: the trail and the key_nibbles.
/// The trail is a vector of trie nodes on the path from root node to the node that is
/// currently being processed together with processing status - the Crumb.
/// The key_nibbles is a vector of nibbles from the state root not to the node that is
/// The key_nibbles is a vector of nibbles from the state root node to the node that is
/// currently being processed.
/// The trail and the key_nibbles may have different lengths e.g. an extension trie node
/// will add only a single item to the trail but may add multiple nibbles to the key_nibbles.
Expand All @@ -57,8 +57,17 @@ pub struct TrieIterator<'a> {
/// If not `None`, a list of all nodes that the iterator has visited.
visited_nodes: Option<Vec<std::sync::Arc<[u8]>>>,

/// Max depth of iteration.
max_depth: Option<usize>,
/// Prune condition is an optional closure that given the key nibbles
/// decides if the given trie node should be pruned.
///
/// If the prune conditions returns true for a given node, this node and the
/// whole sub-tree rooted at this node will be pruned and skipped in iteration.
///
/// Please note that since the iterator supports seeking the prune condition
/// should have the property that if a prefix of a key should be pruned then
/// the key also should be pruned. Otherwise it would be possible to bypass
/// the pruning by seeking inside of the pruned sub-tree.
prune_condition: Option<Box<dyn Fn(&Vec<u8>) -> bool>>,
}

/// The TrieTiem is a tuple of (key, value) of the node.
Expand All @@ -76,19 +85,22 @@ pub struct TrieTraversalItem {
impl<'a> TrieIterator<'a> {
#![allow(clippy::new_ret_no_self)]
/// Create a new iterator.
pub(super) fn new(trie: &'a Trie, max_depth: Option<usize>) -> Result<Self, StorageError> {
pub(super) fn new(
trie: &'a Trie,
prune_condition: Option<Box<dyn Fn(&Vec<u8>) -> bool>>,
) -> Result<Self, StorageError> {
let mut r = TrieIterator {
trie,
trail: Vec::with_capacity(8),
key_nibbles: Vec::with_capacity(64),
visited_nodes: None,
max_depth,
prune_condition,
};
r.descend_into_node(&trie.root)?;
Ok(r)
}

/// Position the iterator on the first element with key => `key`.
/// Position the iterator on the first element with key >= `key`.
pub fn seek_prefix<K: AsRef<[u8]>>(&mut self, key: K) -> Result<(), StorageError> {
self.seek_nibble_slice(NibbleSlice::new(key.as_ref()), true).map(drop)
}
Expand Down Expand Up @@ -387,8 +399,8 @@ impl<'a> Iterator for TrieIterator<'a> {
loop {
let iter_step = self.iter_step()?;

let can_process = match self.max_depth {
Some(max_depth) => self.key_nibbles.len() <= max_depth,
let can_process = match &self.prune_condition {
Some(prune_condition) => !prune_condition(&self.key_nibbles),
None => true,
};

Expand Down Expand Up @@ -420,6 +432,7 @@ impl<'a> Iterator for TrieIterator<'a> {
mod tests {
use std::collections::BTreeMap;

use itertools::Itertools;
use rand::seq::SliceRandom;
use rand::Rng;

Expand All @@ -431,6 +444,10 @@ mod tests {
use crate::Trie;
use near_primitives::shard_layout::ShardUId;

fn value() -> Option<Vec<u8>> {
Some(vec![0])
}

/// Checks that for visiting interval of trie nodes first state key is
/// included and the last one is excluded.
#[test]
Expand All @@ -452,20 +469,7 @@ mod tests {
fn test_iterator() {
let mut rng = rand::thread_rng();
for _ in 0..100 {
let tries = create_tries_complex(1, 2);
let shard_uid = ShardUId { version: 1, shard_id: 0 };
let trie_changes = gen_changes(&mut rng, 10);
let trie_changes = simplify_changes(&trie_changes);

let mut map = BTreeMap::new();
for (key, value) in trie_changes.iter() {
if let Some(value) = value {
map.insert(key.clone(), value.clone());
}
}
let state_root =
test_populate_trie(&tries, &Trie::EMPTY_ROOT, shard_uid, trie_changes.clone());
let trie = tries.get_trie_for_shard(shard_uid, state_root);
let (trie_changes, map, trie) = gen_random_trie(&mut rng);

{
let result1: Vec<_> = trie.iter().unwrap().map(Result::unwrap).collect();
Expand Down Expand Up @@ -500,6 +504,149 @@ mod tests {
}
}

#[test]
fn test_iterator_with_prune_condition_base() {
let mut rng = rand::thread_rng();
for _ in 0..100 {
let (trie_changes, map, trie) = gen_random_trie(&mut rng);

// Check that pruning just one key (and it's subtree) works as expected.
for (prune_key, _) in &trie_changes {
let prune_key = prune_key.clone();
let prune_key_nibbles = NibbleSlice::new(prune_key.as_slice()).iter().collect_vec();
let prune_condition =
move |key_nibbles: &Vec<u8>| key_nibbles.starts_with(&prune_key_nibbles);

let result1 = trie
.iter_with_prune_condition(Some(Box::new(prune_condition.clone())))
.unwrap()
.map(Result::unwrap)
.collect_vec();

let result2 = map
.iter()
.filter(|(key, _)| {
!prune_condition(&NibbleSlice::new(key).iter().collect_vec())
})
.map(|(key, value)| (key.clone(), value.clone()))
.collect_vec();

assert_eq!(result1, result2);
}
}
}

// Check that pruning a node doesn't descend into it's subtree.
// A buggy pruning implementation could still iterate over all the
// nodes but simply not return them. This test makes sure this is
// not the case.
#[test]
fn test_iterator_with_prune_condition_subtree() {
let mut rng = rand::thread_rng();
for _ in 0..100 {
let (trie_changes, map, trie) = gen_random_trie(&mut rng);

// Test pruning by all keys that are present in the trie.
for (prune_key, _) in &trie_changes {
// This prune condition is not valid in a sense that it only
// prunes a single node but not it's subtree. This is
// intentional to test that iterator won't descend into the
// subtree.
let prune_key_nibbles = NibbleSlice::new(prune_key.as_slice()).iter().collect_vec();
let prune_condition =
move |key_nibbles: &Vec<u8>| key_nibbles == &prune_key_nibbles;
// This is how the prune condition should work.
let prune_key_nibbles = NibbleSlice::new(prune_key.as_slice()).iter().collect_vec();
let proper_prune_condition =
move |key_nibbles: &Vec<u8>| key_nibbles.starts_with(&prune_key_nibbles);

let result1 = trie
.iter_with_prune_condition(Some(Box::new(prune_condition.clone())))
.unwrap()
.map(Result::unwrap)
.collect_vec();
let result2 = map
.iter()
.filter(|(key, _)| {
!proper_prune_condition(&NibbleSlice::new(key).iter().collect_vec())
})
.map(|(key, value)| (key.clone(), value.clone()))
.collect_vec();

assert_eq!(result1, result2);
}
}
}

// Utility function for testing trie iteration with the prune condition set.
// * `keys` is a list of keys to be inserted into the trie
// * `pruned_keys` is the expected list of keys that should be the result of iteration
fn test_prune_max_depth_impl(
keys: &Vec<Vec<u8>>,
pruned_keys: &Vec<Vec<u8>>,
max_depth: usize,
) {
let shard_uid = ShardUId::single_shard();
let tries = create_tries();
let trie_changes = keys.iter().map(|key| (key.clone(), value())).collect();
let state_root = test_populate_trie(&tries, &Trie::EMPTY_ROOT, shard_uid, trie_changes);
let trie = tries.get_trie_for_shard(shard_uid, state_root);
let iter = trie.iter_with_max_depth(max_depth).unwrap();
let keys: Vec<_> = iter.map(|item| item.unwrap().0).collect();

assert_eq!(&keys, pruned_keys);
}

#[test]
fn test_prune_max_depth() {
// simple trie with an extension
// extension(11111)
// branch(5, 6)
// leaf(5) leaf(6)
let extension_keys = vec![vec![0x11, 0x11, 0x15], vec![0x11, 0x11, 0x16]];
// max_depth is expressed in nibbles
// both leaf nodes are at depth 6 (11 11 15) and (11 11 16)

// pruning by max depth 5 should return an empty result
test_prune_max_depth_impl(&extension_keys, &vec![], 5);
// pruning by max depth 6 should return both leaves
test_prune_max_depth_impl(&extension_keys, &extension_keys, 6);

// long chain of branches
let chain_keys = vec![
vec![0x11],
vec![0x11, 0x11],
vec![0x11, 0x11, 0x11],
vec![0x11, 0x11, 0x11, 0x11],
vec![0x11, 0x11, 0x11, 0x11, 0x11],
];
test_prune_max_depth_impl(&chain_keys, &vec![], 1);
test_prune_max_depth_impl(&chain_keys, &vec![vec![0x11]], 2);
test_prune_max_depth_impl(&chain_keys, &vec![vec![0x11]], 3);
test_prune_max_depth_impl(&chain_keys, &vec![vec![0x11], vec![0x11, 0x11]], 4);
test_prune_max_depth_impl(&chain_keys, &vec![vec![0x11], vec![0x11, 0x11]], 5);
}

fn gen_random_trie(
rng: &mut rand::rngs::ThreadRng,
) -> (Vec<(Vec<u8>, Option<Vec<u8>>)>, BTreeMap<Vec<u8>, Vec<u8>>, Trie) {
let tries = create_tries_complex(1, 2);
let shard_uid = ShardUId { version: 1, shard_id: 0 };
let trie_changes = gen_changes(rng, 10);
let trie_changes = simplify_changes(&trie_changes);

let mut map = BTreeMap::new();
for (key, value) in trie_changes.iter() {
if let Some(value) = value {
map.insert(key.clone(), value.clone());
}
}
let state_root =
test_populate_trie(&tries, &Trie::EMPTY_ROOT, shard_uid, trie_changes.clone());
let trie = tries.get_trie_for_shard(shard_uid, state_root);
(trie_changes, map, trie)
}

fn test_get_trie_items(
trie: &Trie,
map: &BTreeMap<Vec<u8>, Vec<u8>>,
Expand Down
12 changes: 11 additions & 1 deletion core/store/src/trie/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -879,7 +879,17 @@ impl Trie {
&'a self,
max_depth: usize,
) -> Result<TrieIterator<'a>, StorageError> {
TrieIterator::new(self, Some(max_depth))
TrieIterator::new(
self,
Some(Box::new(move |key_nibbles: &Vec<u8>| key_nibbles.len() > max_depth)),
)
}

pub fn iter_with_prune_condition<'a>(
&'a self,
prune_condition: Option<Box<dyn Fn(&Vec<u8>) -> bool>>,
) -> Result<TrieIterator<'a>, StorageError> {
TrieIterator::new(self, prune_condition)
}

pub fn get_trie_nodes_count(&self) -> TrieNodesCount {
Expand Down
Loading

0 comments on commit ee67af2

Please sign in to comment.