Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

db: Add delayed blocks pruning #12497

Closed
wants to merge 28 commits into from
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
6cfc4e9
client/db: Prune state/blocks with delay
lexnv Oct 14, 2022
69cb590
tests: Delay prune blocks on finalization
lexnv Oct 14, 2022
d18d071
tests: Delay prune blocks with fork
lexnv Oct 14, 2022
a5fb1cc
client/cli: Add delayed pruning mode and make it default
lexnv Oct 14, 2022
4f2f54e
client/db: Announce proper leaves for delayed pruning
lexnv Oct 14, 2022
490c0c9
tests: Verify `displaced_leaves_after_finalizing` with delayed pruning
lexnv Oct 14, 2022
17a1bb9
client/db: Rename delayed_pruning to delay_canonicalization
lexnv Oct 18, 2022
835abe9
client/cli: Fix rustdoc
lexnv Oct 18, 2022
80a78dc
client/db: Handle canonization gap
lexnv Oct 28, 2022
52b0430
Revert "client/cli: Fix rustdoc"
lexnv Oct 28, 2022
2fb4030
Revert "client/cli: Add delayed pruning mode and make it default"
lexnv Oct 28, 2022
ec24de5
client/cli: Add `delayed_canonicalization` flag
lexnv Nov 3, 2022
5bfc4fa
client/service: Fix tests
lexnv Nov 3, 2022
fdee4cc
client/db: Move `delayed` param on the database settings
lexnv Nov 3, 2022
026181b
client/db: Add debug log for pruning and fix canon gap
lexnv Nov 3, 2022
f34dad5
client/db: Apply cargo fmt
lexnv Nov 3, 2022
2c28ac1
client/cli: Improve documentation
lexnv Nov 3, 2022
9f68abd
client/db: Simplify canonicalization on startup
lexnv Nov 3, 2022
c4ae9ec
Merge remote-tracking branch 'origin/master' into lexnv/delayed_pruning
lexnv Nov 3, 2022
5f1c535
client/db: Adjust testing for origin/master
lexnv Nov 3, 2022
118e943
client/db: Remove cloning for block numbers
lexnv Nov 3, 2022
8fabc52
client/db: Use `delayed_canonicalization` naming
lexnv Nov 7, 2022
15cb39d
client/db: Obtain last canonicalized and finalized from meta
lexnv Nov 7, 2022
2014094
client/state-db: Remove private `LAST_CANONICAL` constant
lexnv Nov 7, 2022
fac193f
Merge remote-tracking branch 'origin/master' into lexnv/delayed_pruning
lexnv Nov 7, 2022
6c9baa0
Merge remote-tracking branch 'origin/master' into lexnv/delayed_pruning
lexnv Nov 8, 2022
5603f52
client/db: Adjust testing to origin/master
lexnv Nov 8, 2022
4b6a06e
Merge remote-tracking branch 'origin/master' into lexnv/delayed_pruning
lexnv Nov 8, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion client/cli/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,18 @@ pub(crate) const NODE_NAME_MAX_LENGTH: usize = 64;
/// Default sub directory to store network config.
pub(crate) const DEFAULT_NETWORK_CONFIG_PATH: &str = "network";

/// Delay the pruning of blocks by a given number of finalizations.
///
/// This value should be set to a sensible amount to accommodate the RPC
/// Spec V2 requirements of block pinning.
///
/// The user derives no benefits from controlling this, as the RPC API
/// should be uniform across the nodes.
///
/// This ensures that users have roughly 32 * 6 seconds (block finalization)
/// ~ 3 minutes to fetch the details of blocks.
pub(crate) const DELAYED_PRUNING: u32 = 32;
lexnv marked this conversation as resolved.
Show resolved Hide resolved

/// The recommended open file descriptor limit to be configured for the process.
const RECOMMENDED_OPEN_FILE_DESCRIPTOR_LIMIT: u64 = 10_000;

Expand Down Expand Up @@ -255,7 +267,7 @@ pub trait CliConfiguration<DCV: DefaultConfigurationValues = ()>: Sized {
fn blocks_pruning(&self) -> Result<BlocksPruning> {
self.pruning_params()
.map(|x| x.blocks_pruning())
.unwrap_or_else(|| Ok(BlocksPruning::KeepFinalized))
.unwrap_or_else(|| Ok(BlocksPruning::Delayed(DELAYED_PRUNING)))
}

/// Get the chain ID (string).
Expand Down
15 changes: 10 additions & 5 deletions client/cli/src/params/pruning_params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,16 @@ pub struct PruningParams {
/// or for all of the canonical blocks (i.e 'archive-canonical').
#[clap(alias = "pruning", long, value_name = "PRUNING_MODE")]
pub state_pruning: Option<String>,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there needs to be separate option for this. It does not really control pruning history length, but rather how soon unfinalized branches are discared. @bkchr wdyt?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we could make that work. Do you mean to remove the BlocksPruning::Delayed and add it to the PruningMode from state-db?

We could then in the db use if let PruningMode::Delayed(delayed) = self.storage.state_db.pruning_mode() to determine the delayed value with which to call into state_db.canonicalize_block and to prune blocks.

That would also mean we'd have to add a new pruning id here:

const PRUNING_MODE_DELAYED: &[u8] = b"delayed";

/// Specify the blocks pruning mode, a number of blocks to keep or 'archive'.
/// Specify the blocks pruning mode.
///
/// Default is to keep all finalized blocks.
/// otherwise, all blocks can be kept (i.e 'archive'),
/// or for all canonical blocks (i.e 'archive-canonical'),
/// or for the last N blocks (i.e a number).
/// The options are as follows:
/// 'delayed' Pruning of blocks is delayed for a sensible amount of time to
/// satisfy the RPC Spec V2.
/// 'archive' Keep all blocks.
/// 'archive-canonical' Keep all finalized (canonical) blocks.
/// [number] Keep the last N finalized (canonical) blocks.
///
/// Default is the 'delayed' option.
///
/// NOTE: only finalized blocks are subject for removal!
#[clap(alias = "keep-blocks", long, value_name = "COUNT")]
Expand Down Expand Up @@ -66,6 +70,7 @@ impl PruningParams {
Some(bp) => match bp.as_str() {
"archive" => Ok(BlocksPruning::KeepAll),
"archive-canonical" => Ok(BlocksPruning::KeepFinalized),
"delayed" => Ok(BlocksPruning::Delayed(crate::DELAYED_PRUNING)),
bc => bc
.parse()
.map_err(|_| {
Expand Down
220 changes: 213 additions & 7 deletions client/db/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,14 @@ pub enum BlocksPruning {
KeepFinalized,
/// Keep N recent finalized blocks.
Some(u32),
/// Delay the pruning of blocks by a given number of finalizations.
///
/// The blocks that were supposed to get pruned at the finalization N
/// will get pruned at N + delay.
///
/// The feature is introduced to satisfy the block pinning required
/// by the RPC spec V2.
Delayed(u32),
}

/// Where to find the database..
Expand Down Expand Up @@ -481,10 +489,11 @@ pub struct BlockchainDb<Block: BlockT> {
leaves: RwLock<LeafSet<Block::Hash, NumberFor<Block>>>,
header_metadata_cache: Arc<HeaderMetadataCache<Block>>,
header_cache: Mutex<LinkedHashMap<Block::Hash, Option<Block::Header>>>,
delayed_pruning: Option<u32>,
lexnv marked this conversation as resolved.
Show resolved Hide resolved
}

impl<Block: BlockT> BlockchainDb<Block> {
fn new(db: Arc<dyn Database<DbHash>>) -> ClientResult<Self> {
fn new(db: Arc<dyn Database<DbHash>>, delayed_pruning: Option<u32>) -> ClientResult<Self> {
let meta = read_meta::<Block>(&*db, columns::HEADER)?;
let leaves = LeafSet::read_from_db(&*db, columns::META, meta_keys::LEAF_PREFIX)?;
Ok(BlockchainDb {
Expand All @@ -493,6 +502,7 @@ impl<Block: BlockT> BlockchainDb<Block> {
meta: Arc::new(RwLock::new(meta)),
header_metadata_cache: Arc::new(HeaderMetadataCache::default()),
header_cache: Default::default(),
delayed_pruning,
})
}

Expand Down Expand Up @@ -659,8 +669,17 @@ impl<Block: BlockT> sc_client_api::blockchain::Backend<Block> for BlockchainDb<B

fn displaced_leaves_after_finalizing(
&self,
block_number: NumberFor<Block>,
mut block_number: NumberFor<Block>,
) -> ClientResult<Vec<Block::Hash>> {
if let Some(delayed) = self.delayed_pruning {
// No displaced leaves
if block_number < delayed.into() {
return Ok(Default::default())
}

block_number = block_number.saturating_sub(delayed.into());
}

Ok(self
.leaves
.read()
Expand Down Expand Up @@ -1077,7 +1096,7 @@ impl<Block: BlockT> Backend<Block> {
let state_pruning = match blocks_pruning {
BlocksPruning::KeepAll => PruningMode::ArchiveAll,
BlocksPruning::KeepFinalized => PruningMode::ArchiveCanonical,
BlocksPruning::Some(n) => PruningMode::blocks_pruning(n),
BlocksPruning::Some(n) | BlocksPruning::Delayed(n) => PruningMode::blocks_pruning(n),
};
let db_setting = DatabaseSettings {
trie_cache_maximum_size: Some(16 * 1024 * 1024),
Expand Down Expand Up @@ -1130,7 +1149,13 @@ impl<Block: BlockT> Backend<Block> {

let state_pruning_used = state_db.pruning_mode();
let is_archive_pruning = state_pruning_used.is_archive();
let blockchain = BlockchainDb::new(db.clone())?;
let delayed_pruning = if let BlocksPruning::Delayed(delayed_pruning) = config.blocks_pruning
{
Some(delayed_pruning)
} else {
None
};
let blockchain = BlockchainDb::new(db.clone(), delayed_pruning)?;

let storage_db =
StorageDb { db: db.clone(), state_db, prefix_keys: !db.supports_ref_counting() };
Expand Down Expand Up @@ -1674,18 +1699,33 @@ impl<Block: BlockT> Backend<Block> {
&self,
transaction: &mut Transaction<DbHash>,
f_header: &Block::Header,
f_hash: Block::Hash,
mut f_hash: Block::Hash,
displaced: &mut Option<FinalizationOutcome<Block::Hash, NumberFor<Block>>>,
with_state: bool,
) -> ClientResult<()> {
let f_num = *f_header.number();

let mut f_num = *f_header.number();
let lookup_key = utils::number_and_hash_to_lookup_key(f_num, f_hash)?;
if with_state {
transaction.set_from_vec(columns::META, meta_keys::FINALIZED_STATE, lookup_key.clone());
}
transaction.set_from_vec(columns::META, meta_keys::FINALIZED_BLOCK, lookup_key);

// Update the "finalized" number and hash for pruning of N - delay.
// This implies handling both cases:
// - pruning in the state-db via `canonicalize_block`
// - pruning in db via displaced leaves and `prune_blocks`
if let BlocksPruning::Delayed(delayed) = self.blocks_pruning {
// No blocks to prune in this window.
if f_num < delayed.into() {
return Ok(())
}

f_num = f_num.saturating_sub(delayed.into());
f_hash = self.blockchain.hash(f_num)?.ok_or_else(|| {
sp_blockchain::Error::UnknownBlock(format!("Unknown block number {}", f_num))
})?;
}

if sc_client_api::Backend::have_state_at(self, &f_hash, f_num) &&
self.storage
.state_db
Expand Down Expand Up @@ -1731,6 +1771,11 @@ impl<Block: BlockT> Backend<Block> {
BlocksPruning::KeepFinalized => {
self.prune_displaced_branches(transaction, finalized, displaced)?;
},
BlocksPruning::Delayed(_) => {
// Proper offset and valid displaced set of leaves provided by `note_finalized`.
self.prune_block(transaction, BlockId::<Block>::number(finalized))?;
self.prune_displaced_branches(transaction, finalized, displaced)?;
},
}
Ok(())
}
Expand Down Expand Up @@ -3791,4 +3836,165 @@ pub(crate) mod tests {
assert_eq!(backend.blockchain().leaves().unwrap(), vec![block2]);
assert_eq!(backend.blockchain().info().best_hash, block2);
}

#[test]
fn delayed_prune_blocks_on_finalize() {
let backend = Backend::<Block>::new_test_with_tx_storage(BlocksPruning::Delayed(2), 0);
let ext = Default::default();
let hash_0 =
insert_block(&backend, 0, Default::default(), None, ext, vec![0.into()], None).unwrap();
let hash_1 = insert_block(&backend, 1, hash_0, None, ext, vec![1.into()], None).unwrap();

// Block tree:
// 0 -> 1
let mut op = backend.begin_operation().unwrap();
backend.begin_state_operation(&mut op, BlockId::Hash(hash_1)).unwrap();
op.mark_finalized(BlockId::Hash(hash_0), None).unwrap();
op.mark_finalized(BlockId::Hash(hash_1), None).unwrap();
backend.commit_operation(op).unwrap();

let bc = backend.blockchain();
// Delayed pruning must keep both blocks around.
assert_eq!(Some(vec![0.into()]), bc.body(BlockId::hash(hash_0)).unwrap());
assert_eq!(Some(vec![1.into()]), bc.body(BlockId::hash(hash_1)).unwrap());

// Block tree:
// 0 -> 1 -> 2 -> 3
let hash_2 = insert_block(&backend, 2, hash_1, None, ext, vec![2.into()], None).unwrap();
let hash_3 = insert_block(&backend, 3, hash_2, None, ext, vec![3.into()], None).unwrap();

let mut op = backend.begin_operation().unwrap();
backend.begin_state_operation(&mut op, BlockId::Hash(hash_3)).unwrap();
op.mark_finalized(BlockId::Hash(hash_2), None).unwrap();
op.mark_finalized(BlockId::Hash(hash_3), None).unwrap();
backend.commit_operation(op).unwrap();

// Blocks 0 and 1 are pruned.
assert!(bc.body(BlockId::hash(hash_0)).unwrap().is_none());
assert!(bc.body(BlockId::hash(hash_1)).unwrap().is_none());

assert_eq!(Some(vec![2.into()]), bc.body(BlockId::hash(hash_2)).unwrap());
assert_eq!(Some(vec![3.into()]), bc.body(BlockId::hash(hash_3)).unwrap());
}

#[test]
fn delayed_prune_blocks_on_finalize_with_fork() {
let backend = Backend::<Block>::new_test_with_tx_storage(BlocksPruning::Delayed(2), 10);
let mut blocks = Vec::new();
let mut prev_hash = Default::default();

// Block tree:
// 0 -> 1 -> 2 -> 3 -> 4 -> 5 -> 6
for i in 0..7 {
let hash = insert_block(
&backend,
i,
prev_hash,
None,
Default::default(),
vec![i.into()],
None,
)
.unwrap();
blocks.push(hash);
prev_hash = hash;
}

// Insert a fork at the third block.
// Block tree:
// 0 -> 1 -> 2 -> 3 -> 4 -> 5 -> 6
// 2 -> 3
let fork_hash_root =
insert_block(&backend, 3, blocks[2], None, H256::random(), vec![31.into()], None)
.unwrap();

let mut op = backend.begin_operation().unwrap();
backend.begin_state_operation(&mut op, BlockId::hash(blocks[4])).unwrap();
op.mark_head(BlockId::hash(blocks[4])).unwrap();
backend.commit_operation(op).unwrap();

// Mark blocks 0, 1, 2 as finalized.
let mut op = backend.begin_operation().unwrap();
backend.begin_state_operation(&mut op, BlockId::hash(blocks[2])).unwrap();
op.mark_finalized(BlockId::hash(blocks[0]), None).unwrap();
op.mark_finalized(BlockId::hash(blocks[1]), None).unwrap();
op.mark_finalized(BlockId::hash(blocks[2]), None).unwrap();
backend.commit_operation(op).unwrap();

let bc = backend.blockchain();
// Block 0 is pruned.
assert!(bc.body(BlockId::hash(blocks[0])).unwrap().is_none());
assert_eq!(Some(vec![1.into()]), bc.body(BlockId::hash(blocks[1])).unwrap());
assert_eq!(Some(vec![2.into()]), bc.body(BlockId::hash(blocks[2])).unwrap());
assert_eq!(Some(vec![3.into()]), bc.body(BlockId::hash(blocks[3])).unwrap());
assert_eq!(Some(vec![4.into()]), bc.body(BlockId::hash(blocks[4])).unwrap());
assert_eq!(Some(vec![5.into()]), bc.body(BlockId::hash(blocks[5])).unwrap());
assert_eq!(Some(vec![6.into()]), bc.body(BlockId::hash(blocks[6])).unwrap());
assert_eq!(Some(vec![31.into()]), bc.body(BlockId::hash(fork_hash_root)).unwrap());

// Mark block 3 as finalized.
let mut op = backend.begin_operation().unwrap();
backend.begin_state_operation(&mut op, BlockId::hash(blocks[3])).unwrap();
op.mark_finalized(BlockId::hash(blocks[3]), None).unwrap();
backend.commit_operation(op).unwrap();

// Block 1 is pruned.
assert!(bc.body(BlockId::hash(blocks[1])).unwrap().is_none());
assert_eq!(Some(vec![2.into()]), bc.body(BlockId::hash(blocks[2])).unwrap());
assert_eq!(Some(vec![3.into()]), bc.body(BlockId::hash(blocks[3])).unwrap());
assert_eq!(Some(vec![4.into()]), bc.body(BlockId::hash(blocks[4])).unwrap());
assert_eq!(Some(vec![5.into()]), bc.body(BlockId::hash(blocks[5])).unwrap());
assert_eq!(Some(vec![6.into()]), bc.body(BlockId::hash(blocks[6])).unwrap());
assert_eq!(Some(vec![31.into()]), bc.body(BlockId::hash(fork_hash_root)).unwrap());

// Mark block 4 as finalized.
let mut op = backend.begin_operation().unwrap();
backend.begin_state_operation(&mut op, BlockId::hash(blocks[4])).unwrap();
op.mark_finalized(BlockId::hash(blocks[4]), None).unwrap();
backend.commit_operation(op).unwrap();

// Block 2 is pruned along with its fork.
assert!(bc.body(BlockId::hash(blocks[2])).unwrap().is_none());
assert_eq!(Some(vec![31.into()]), bc.body(BlockId::hash(fork_hash_root)).unwrap());
assert_eq!(Some(vec![3.into()]), bc.body(BlockId::hash(blocks[3])).unwrap());
assert_eq!(Some(vec![4.into()]), bc.body(BlockId::hash(blocks[4])).unwrap());
assert_eq!(Some(vec![5.into()]), bc.body(BlockId::hash(blocks[5])).unwrap());
assert_eq!(Some(vec![6.into()]), bc.body(BlockId::hash(blocks[6])).unwrap());

// Mark block 5 as finalized.
let mut op = backend.begin_operation().unwrap();
backend.begin_state_operation(&mut op, BlockId::hash(blocks[5])).unwrap();
op.mark_finalized(BlockId::hash(blocks[5]), None).unwrap();
backend.commit_operation(op).unwrap();

assert!(bc.body(BlockId::hash(blocks[3])).unwrap().is_none());
assert_eq!(Some(vec![31.into()]), bc.body(BlockId::hash(fork_hash_root)).unwrap());
assert_eq!(Some(vec![4.into()]), bc.body(BlockId::hash(blocks[4])).unwrap());
assert_eq!(Some(vec![5.into()]), bc.body(BlockId::hash(blocks[5])).unwrap());
assert_eq!(Some(vec![6.into()]), bc.body(BlockId::hash(blocks[6])).unwrap());

// Ensure the forked leaf 3 is properly stated here.
let displaced = backend.blockchain().displaced_leaves_after_finalizing(6).unwrap();
assert_eq!(1, displaced.len());
assert_eq!(fork_hash_root, displaced[0]);

// Mark block 6 as finalized.
// Because we delay prune by 2, when we finalize block 6 we are actually
// pruning at block 4. The displaced leaves for block 4 are computed
// at hight (block number - 1) = 3. This is the time when the fork
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// at hight (block number - 1) = 3. This is the time when the fork
// at height (block number - 1) = 3. This is the time when the fork

// is picked up for pruning.
let mut op = backend.begin_operation().unwrap();
backend.begin_state_operation(&mut op, BlockId::hash(blocks[6])).unwrap();
op.mark_finalized(BlockId::hash(blocks[6]), None).unwrap();
backend.commit_operation(op).unwrap();

assert!(bc.body(BlockId::hash(blocks[4])).unwrap().is_none());
assert!(bc.body(BlockId::hash(fork_hash_root)).unwrap().is_none());
assert_eq!(Some(vec![5.into()]), bc.body(BlockId::hash(blocks[5])).unwrap());
assert_eq!(Some(vec![6.into()]), bc.body(BlockId::hash(blocks[6])).unwrap());

// No leaves to report for theoretical node 7.
let displaced = backend.blockchain().displaced_leaves_after_finalizing(7).unwrap();
assert!(displaced.is_empty());
}
}