From 51f9f44a15198207b00980ed5e882b0cc5bc3367 Mon Sep 17 00:00:00 2001 From: Trisfald Date: Fri, 30 Aug 2024 12:09:37 +0200 Subject: [PATCH 01/36] fix typo --- core/store/src/trie/from_flat.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/store/src/trie/from_flat.rs b/core/store/src/trie/from_flat.rs index 71cf52684ea..575a338dc54 100644 --- a/core/store/src/trie/from_flat.rs +++ b/core/store/src/trie/from_flat.rs @@ -5,11 +5,11 @@ use std::time::Instant; // This function creates a new trie from flat storage for a given shard_uid // store: location of RocksDB store from where we read flatstore -// write_store: location of RocksDB store where we write the newly constructred trie +// write_store: location of RocksDB store where we write the newly constructed trie // shard_uid: The shard which we are recreating // // Please note that the trie is created for the block state with height equal to flat_head -// flat state can comtain deltas after flat_head and can be different from tip of the blockchain. +// flat state can contain deltas after flat_head and can be different from tip of the blockchain. pub fn construct_trie_from_flat(store: Store, write_store: Store, shard_uid: ShardUId) { let trie_storage = TrieDBStorage::new(store.clone(), shard_uid); let flat_state_to_trie_kv = From eb421de96e6f4090967858f126aef022db690524 Mon Sep 17 00:00:00 2001 From: Trisfald Date: Tue, 3 Sep 2024 10:52:18 +0200 Subject: [PATCH 02/36] skeleton for flat storage resharding V3 update --- chain/chain/src/flat_storage_creator.rs | 9 +++++- core/store/src/flat/types.rs | 39 +++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/chain/chain/src/flat_storage_creator.rs b/chain/chain/src/flat_storage_creator.rs index 70064dc908a..12a18998968 100644 --- a/chain/chain/src/flat_storage_creator.rs +++ b/chain/chain/src/flat_storage_creator.rs @@ -413,6 +413,9 @@ impl FlatStorageShardCreator { FlatStorageStatus::Disabled => { panic!("initiated flat storage creation for shard {shard_id} while it is disabled"); } + // If the flat storage is undergoing resharding it means it was created successfully + // and there's nothing else to do. + FlatStorageStatus::Resharding(_) => return Ok(true), }; Ok(false) } @@ -498,6 +501,9 @@ impl FlatStorageCreator { ); } FlatStorageStatus::Disabled => {} + FlatStorageStatus::Resharding(_) => { + todo!("resume resharding of flat storage") + } } } @@ -527,7 +533,8 @@ impl FlatStorageCreator { } FlatStorageStatus::Empty | FlatStorageStatus::Creation(_) - | FlatStorageStatus::Disabled => { + | FlatStorageStatus::Disabled + | FlatStorageStatus::Resharding(_) => { // The flat storage for children shards will be created // separately in the resharding process. } diff --git a/core/store/src/flat/types.rs b/core/store/src/flat/types.rs index 5397d8c6541..e4f80899b33 100644 --- a/core/store/src/flat/types.rs +++ b/core/store/src/flat/types.rs @@ -65,6 +65,8 @@ pub enum FlatStorageStatus { Creation(FlatStorageCreationStatus), /// Flat Storage is ready to be used. Ready(FlatStorageReadyStatus), + /// Flat storage is undergoing resharding. + Resharding(FlatStorageReshardingStatus), } impl Into for &FlatStorageStatus { @@ -81,6 +83,12 @@ impl Into for &FlatStorageStatus { FlatStorageCreationStatus::FetchingState(_) => 11, FlatStorageCreationStatus::CatchingUp(_) => 12, }, + // 20..30 is reserved for resharding statuses. + FlatStorageStatus::Resharding(resharding_status) => match resharding_status { + FlatStorageReshardingStatus::SplittingParent => 20, + FlatStorageReshardingStatus::CreatingChild => 21, + FlatStorageReshardingStatus::CatchingUp(_) => 22, + }, } } } @@ -126,6 +134,37 @@ pub enum FlatStorageCreationStatus { CatchingUp(CryptoHash), } +/// This struct represents what is the current status of flat storage resharding. +/// During resharding flat storage must be changed to reflect the new shard layout. +/// +/// When two shards are split, the parent shard disappears and two children are created. The flat storage +/// entries that belonged to the parent must be copied in one of the two shards. This operation happens in the +/// background and could take significant time. +/// After all elements have been copied the new flat storages will be behind the chain head. To remediate this issue +/// they will enter a catching up phase. The parent shard, instead, must be removed and cleaned up. +#[derive( + BorshSerialize, + BorshDeserialize, + Copy, + Clone, + Debug, + PartialEq, + Eq, + serde::Serialize, + ProtocolSchema, +)] +pub enum FlatStorageReshardingStatus { + /// Resharding phase entered when a shard is being split. + /// Copy key-value pairs from this shard (the parent) to children shards. + SplittingParent, + /// Resharding phase entered when a shard is being split. + /// This shard (child) is being built from state taken from its parent. + CreatingChild, + /// We apply deltas from disk until the head reaches final head. + /// Includes block hash of flat storage head. + CatchingUp(CryptoHash), +} + /// Current step of fetching state to fill flat storage. #[derive( BorshSerialize, From c1a71a67a68eedacc0fc9f0101789cf2b28f62f2 Mon Sep 17 00:00:00 2001 From: Trisfald Date: Tue, 3 Sep 2024 12:26:57 +0200 Subject: [PATCH 03/36] skeleton of flat storage resharder --- chain/chain/src/flat_storage_creator.rs | 11 +++++++---- chain/chain/src/flat_storage_resharder.rs | 20 ++++++++++++++++++++ chain/chain/src/lib.rs | 1 + 3 files changed, 28 insertions(+), 4 deletions(-) create mode 100644 chain/chain/src/flat_storage_resharder.rs diff --git a/chain/chain/src/flat_storage_creator.rs b/chain/chain/src/flat_storage_creator.rs index 12a18998968..bfd9f670f30 100644 --- a/chain/chain/src/flat_storage_creator.rs +++ b/chain/chain/src/flat_storage_creator.rs @@ -413,9 +413,12 @@ impl FlatStorageShardCreator { FlatStorageStatus::Disabled => { panic!("initiated flat storage creation for shard {shard_id} while it is disabled"); } - // If the flat storage is undergoing resharding it means it was created successfully - // and there's nothing else to do. - FlatStorageStatus::Resharding(_) => return Ok(true), + // If the flat storage is undergoing resharding it means it was previously created successfully, + // but resharding itself hasn't been finished and must be resumed + FlatStorageStatus::Resharding(_) => { + todo!("create FlatStorageResharder"); + return Ok(true); + } }; Ok(false) } @@ -502,7 +505,7 @@ impl FlatStorageCreator { } FlatStorageStatus::Disabled => {} FlatStorageStatus::Resharding(_) => { - todo!("resume resharding of flat storage") + todo!("create FlatStorageResharder"); } } } diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs new file mode 100644 index 00000000000..f1c79112720 --- /dev/null +++ b/chain/chain/src/flat_storage_resharder.rs @@ -0,0 +1,20 @@ +//! Logic for resharding flat storage in parallel to chain processing. +//! +//! See [FlatStorageReshard] for more details about how the resharding takes place. + +/// `FlatStorageReshard` takes care of updating flat storage when a resharding event +/// happens. +/// +/// On an high level, the operations supported are: +/// - #### Shard splitting +/// Parent shard must be split into two children. The entire operation freezes the flat storage +/// for the involved shards. +/// Children shards are created empty and the key-values of the parent will be copied into one of them, +/// in the background. +/// +/// After the copy is finished the children shard will have the correct state at some past block height. +/// It'll be necessary to perform catchup before the flat storage can be put again in Ready state. +/// The parent shard storage is not needed anymore and can be removed. +pub struct FlatStorageReshard { + // TODO +} diff --git a/chain/chain/src/lib.rs b/chain/chain/src/lib.rs index d296f721a5d..e7b066b3b50 100644 --- a/chain/chain/src/lib.rs +++ b/chain/chain/src/lib.rs @@ -19,6 +19,7 @@ pub mod chunks_store; pub mod crypto_hash_timer; mod doomslug; pub mod flat_storage_creator; +pub mod flat_storage_resharder; mod garbage_collection; mod lightclient; pub mod metrics; From 923416082c07c89e3471e3f742249133d1b411af Mon Sep 17 00:00:00 2001 From: Trisfald Date: Tue, 3 Sep 2024 15:10:30 +0200 Subject: [PATCH 04/36] add TODO details --- chain/chain/src/flat_storage_resharder.rs | 16 +++++++++++++++- core/store/src/flat/types.rs | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index f1c79112720..fbaef5fddbd 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -2,6 +2,13 @@ //! //! See [FlatStorageReshard] for more details about how the resharding takes place. +use std::sync::Arc; + +use near_epoch_manager::EpochManagerAdapter; +use near_primitives::types::BlockHeight; + +use crate::types::RuntimeAdapter; + /// `FlatStorageReshard` takes care of updating flat storage when a resharding event /// happens. /// @@ -16,5 +23,12 @@ /// It'll be necessary to perform catchup before the flat storage can be put again in Ready state. /// The parent shard storage is not needed anymore and can be removed. pub struct FlatStorageReshard { - // TODO + /// Height on top of which this struct was created. + start_height: BlockHeight, + epoch_manager: Arc, + runtime: Arc, + // TODO(Trisfald) + // add shard_uid parent, children + // add metrics + // add object to hold intermediate state } diff --git a/core/store/src/flat/types.rs b/core/store/src/flat/types.rs index e4f80899b33..bbef21399cd 100644 --- a/core/store/src/flat/types.rs +++ b/core/store/src/flat/types.rs @@ -156,6 +156,7 @@ pub enum FlatStorageCreationStatus { pub enum FlatStorageReshardingStatus { /// Resharding phase entered when a shard is being split. /// Copy key-value pairs from this shard (the parent) to children shards. + /// TODO(Trisfald): probably store latest status of splitting operation SplittingParent, /// Resharding phase entered when a shard is being split. /// This shard (child) is being built from state taken from its parent. From c39b4766ae2252a5af0e3f954eaa89da78fa5c45 Mon Sep 17 00:00:00 2001 From: Trisfald Date: Fri, 20 Sep 2024 12:21:48 +0200 Subject: [PATCH 05/36] add API stub to resume or start resharding of flat storage --- chain/chain/src/flat_storage_creator.rs | 14 +++-- chain/chain/src/flat_storage_resharder.rs | 74 ++++++++++++++++++++--- chain/client/src/client.rs | 7 +++ core/primitives/src/errors.rs | 3 + core/store/src/flat/mod.rs | 2 +- core/store/src/flat/types.rs | 18 ++---- 6 files changed, 93 insertions(+), 25 deletions(-) diff --git a/chain/chain/src/flat_storage_creator.rs b/chain/chain/src/flat_storage_creator.rs index bfd9f670f30..080b3d7d39f 100644 --- a/chain/chain/src/flat_storage_creator.rs +++ b/chain/chain/src/flat_storage_creator.rs @@ -9,6 +9,7 @@ //! `CatchingUp`: moves flat storage head forward, so it may reach chain final head. //! `Ready`: flat storage is created and it is up-to-date. +use crate::flat_storage_resharder::FlatStorageResharder; use crate::types::RuntimeAdapter; use crate::{ChainStore, ChainStoreAccess}; use assert_matches::assert_matches; @@ -414,9 +415,9 @@ impl FlatStorageShardCreator { panic!("initiated flat storage creation for shard {shard_id} while it is disabled"); } // If the flat storage is undergoing resharding it means it was previously created successfully, - // but resharding itself hasn't been finished and must be resumed + // but resharding itself hasn't been finished. + // This case is a no-op because the flat storage resharder has already been created in `create_flat_storage_for_current_epoch`. FlatStorageStatus::Resharding(_) => { - todo!("create FlatStorageResharder"); return Ok(true); } }; @@ -434,10 +435,13 @@ pub struct FlatStorageCreator { impl FlatStorageCreator { /// For each of tracked shards, either creates flat storage if it is already stored on DB, /// or starts migration to flat storage which updates DB in background and creates flat storage afterwards. + /// + /// Also resumes any resharding operation which was already in progress. pub fn new( epoch_manager: Arc, runtime: Arc, chain_store: &ChainStore, + flat_storage_resharder: &FlatStorageResharder, num_threads: usize, ) -> Result, Error> { let flat_storage_manager = runtime.get_flat_storage_manager(); @@ -451,6 +455,7 @@ impl FlatStorageCreator { &epoch_manager, &flat_storage_manager, &runtime, + &flat_storage_resharder, )?; // Create flat storage for the shards in the next epoch. This only @@ -478,6 +483,7 @@ impl FlatStorageCreator { epoch_manager: &Arc, flat_storage_manager: &FlatStorageManager, runtime: &Arc, + flat_storage_resharder: &FlatStorageResharder, ) -> Result, Error> { let epoch_id = &chain_head.epoch_id; tracing::debug!(target: "store", ?epoch_id, "creating flat storage for the current epoch"); @@ -504,8 +510,8 @@ impl FlatStorageCreator { ); } FlatStorageStatus::Disabled => {} - FlatStorageStatus::Resharding(_) => { - todo!("create FlatStorageResharder"); + FlatStorageStatus::Resharding(status) => { + flat_storage_resharder.resume(&shard_uid, &status)?; } } } diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index fbaef5fddbd..aed6ec8d434 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -1,15 +1,16 @@ //! Logic for resharding flat storage in parallel to chain processing. //! -//! See [FlatStorageReshard] for more details about how the resharding takes place. +//! See [FlatStorageResharder] for more details about how the resharding takes place. -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use near_epoch_manager::EpochManagerAdapter; -use near_primitives::types::BlockHeight; +use near_store::{flat::FlatStorageReshardingStatus, ShardUId, StorageError}; +use tracing::{error, info}; use crate::types::RuntimeAdapter; -/// `FlatStorageReshard` takes care of updating flat storage when a resharding event +/// `FlatStorageResharder` takes care of updating flat storage when a resharding event /// happens. /// /// On an high level, the operations supported are: @@ -22,11 +23,70 @@ use crate::types::RuntimeAdapter; /// After the copy is finished the children shard will have the correct state at some past block height. /// It'll be necessary to perform catchup before the flat storage can be put again in Ready state. /// The parent shard storage is not needed anymore and can be removed. -pub struct FlatStorageReshard { - /// Height on top of which this struct was created. - start_height: BlockHeight, +pub struct FlatStorageResharder { epoch_manager: Arc, runtime: Arc, + resharding_event: Mutex>, +} + +impl FlatStorageResharder { + /// Creates a new `FlatStorageResharder`. + pub fn new( + epoch_manager: Arc, + runtime: Arc, + ) -> Self { + let resharding_event = Mutex::new(None); + Self { epoch_manager, runtime, resharding_event } + } + + /// Resumes a resharding operation that was in progress. + pub fn resume( + &self, + shard_uid: &ShardUId, + status: &FlatStorageReshardingStatus, + ) -> Result<(), StorageError> { + match status { + FlatStorageReshardingStatus::CreatingChild => { + // Nothing to do here because the parent will take care of resuming work. + } + FlatStorageReshardingStatus::SplittingParent(_) => { + let parent_shard_uid = shard_uid; + info!(target: "resharding", ?parent_shard_uid, "resuming flat storage resharding"); + self.check_no_resharding_in_progress()?; + // On resume flat storage status is already set. + // TODO(trisfald): create flat storage resharding event + } + FlatStorageReshardingStatus::CatchingUp(_) => { + // TODO(trisfald) + todo!() + } + } + Ok(()) + } + + /// Starts the operation of splitting a parent shard flat storage into two children. + pub fn split_shard(&self, parent_shard_uid: &ShardUId) -> Result<(), StorageError> { + info!(target: "resharding", ?parent_shard_uid, "initiating flat storage split"); + self.check_no_resharding_in_progress()?; + // Change parent shard flat storage status. + + // todo(Trisfald): create flat storage resharding event + Ok(()) + } + + fn check_no_resharding_in_progress(&self) -> Result<(), StorageError> { + // Do not allow multiple resharding operations in parallel. + if self.resharding_event.lock().unwrap().is_some() { + error!(target: "resharding", "trying to start a new flat storage resharding operation while one is already in progress!"); + Err(StorageError::FlatStorageReshardingAlreadyInProgress) + } else { + Ok(()) + } + } +} + +/// Struct to describe, perform and track progress of a flat storage resharding. +pub struct FlatStorageReshardingEvent { // TODO(Trisfald) // add shard_uid parent, children // add metrics diff --git a/chain/client/src/client.rs b/chain/client/src/client.rs index 943630212ac..0e067166eef 100644 --- a/chain/client/src/client.rs +++ b/chain/client/src/client.rs @@ -25,6 +25,7 @@ use near_chain::chain::{ BlocksCatchUpState, LoadMemtrieRequest, VerifyBlockHashAndSignatureResult, }; use near_chain::flat_storage_creator::FlatStorageCreator; +use near_chain::flat_storage_resharder::FlatStorageResharder; use near_chain::orphan::OrphanMissingChunks; use near_chain::state_snapshot_actor::SnapshotCallbacks; use near_chain::test_utils::format_hash; @@ -176,6 +177,8 @@ pub struct Client { /// Cached precomputed set of TIER1 accounts. /// See send_network_chain_info(). tier1_accounts_cache: Option<(EpochId, Arc)>, + /// Takes care of performing resharding on the flat storage. + flat_storage_resharder: FlatStorageResharder, /// Used when it is needed to create flat storage in background for some shards. flat_storage_creator: Option, /// A map storing the last time a block was requested for state sync. @@ -269,11 +272,14 @@ impl Client { async_computation_spawner.clone(), validator_signer.clone(), )?; + let flat_storage_resharder = + FlatStorageResharder::new(epoch_manager.clone(), runtime_adapter.clone()); // Create flat storage or initiate migration to flat storage. let flat_storage_creator = FlatStorageCreator::new( epoch_manager.clone(), runtime_adapter.clone(), chain.chain_store(), + &flat_storage_resharder, chain_config.background_migration_threads, )?; let sharded_tx_pool = @@ -397,6 +403,7 @@ impl Client { NonZeroUsize::new(PRODUCTION_TIMES_CACHE_SIZE).unwrap(), ), tier1_accounts_cache: None, + flat_storage_resharder, flat_storage_creator, last_time_sync_block_requested: HashMap::new(), chunk_validator, diff --git a/core/primitives/src/errors.rs b/core/primitives/src/errors.rs index 33d9699de7b..01fffbebe8a 100644 --- a/core/primitives/src/errors.rs +++ b/core/primitives/src/errors.rs @@ -133,6 +133,9 @@ pub enum StorageError { FlatStorageBlockNotSupported(String), /// In-memory trie could not be loaded for some reason. MemTrieLoadingError(String), + /// Indicates that a resharding operation on flat storage is already in progress, + /// when it wasn't expected to be so. + FlatStorageReshardingAlreadyInProgress, } impl std::fmt::Display for StorageError { diff --git a/core/store/src/flat/mod.rs b/core/store/src/flat/mod.rs index d195bf7e3ee..d50a2f1f1e1 100644 --- a/core/store/src/flat/mod.rs +++ b/core/store/src/flat/mod.rs @@ -44,7 +44,7 @@ pub use metrics::FlatStorageCreationMetrics; pub use storage::FlatStorage; pub use types::{ BlockInfo, FetchingStateStatus, FlatStateIterator, FlatStorageCreationStatus, FlatStorageError, - FlatStorageReadyStatus, FlatStorageStatus, + FlatStorageReadyStatus, FlatStorageReshardingStatus, FlatStorageStatus, }; pub(crate) const POISONED_LOCK_ERR: &str = "The lock was poisoned."; diff --git a/core/store/src/flat/types.rs b/core/store/src/flat/types.rs index bbef21399cd..3a2808f7e31 100644 --- a/core/store/src/flat/types.rs +++ b/core/store/src/flat/types.rs @@ -2,7 +2,7 @@ use borsh::{BorshDeserialize, BorshSerialize}; use near_primitives::errors::StorageError; use near_primitives::hash::CryptoHash; use near_primitives::state::FlatStateValue; -use near_primitives::types::BlockHeight; +use near_primitives::types::{AccountId, BlockHeight}; use near_schema_checker_lib::ProtocolSchema; #[derive(BorshSerialize, BorshDeserialize, Debug, Copy, Clone, PartialEq, Eq, serde::Serialize)] @@ -85,7 +85,7 @@ impl Into for &FlatStorageStatus { }, // 20..30 is reserved for resharding statuses. FlatStorageStatus::Resharding(resharding_status) => match resharding_status { - FlatStorageReshardingStatus::SplittingParent => 20, + FlatStorageReshardingStatus::SplittingParent(_) => 20, FlatStorageReshardingStatus::CreatingChild => 21, FlatStorageReshardingStatus::CatchingUp(_) => 22, }, @@ -143,21 +143,13 @@ pub enum FlatStorageCreationStatus { /// After all elements have been copied the new flat storages will be behind the chain head. To remediate this issue /// they will enter a catching up phase. The parent shard, instead, must be removed and cleaned up. #[derive( - BorshSerialize, - BorshDeserialize, - Copy, - Clone, - Debug, - PartialEq, - Eq, - serde::Serialize, - ProtocolSchema, + BorshSerialize, BorshDeserialize, Clone, Debug, PartialEq, Eq, serde::Serialize, ProtocolSchema, )] pub enum FlatStorageReshardingStatus { /// Resharding phase entered when a shard is being split. /// Copy key-value pairs from this shard (the parent) to children shards. - /// TODO(Trisfald): probably store latest status of splitting operation - SplittingParent, + /// Includes the latest AccountID moved from parent to child. + SplittingParent(Option), /// Resharding phase entered when a shard is being split. /// This shard (child) is being built from state taken from its parent. CreatingChild, From f9773266e29141b32bea8dc9c49071b9ff1cdd3d Mon Sep 17 00:00:00 2001 From: Trisfald Date: Tue, 24 Sep 2024 19:14:02 +0200 Subject: [PATCH 06/36] flat storage resharding start fn --- chain/chain/src/flat_storage_resharder.rs | 294 +++++++++++++++++++--- chain/client/src/client.rs | 5 +- core/primitives/src/shard_layout.rs | 33 ++- core/store/src/flat/mod.rs | 2 +- core/store/src/flat/types.rs | 19 +- 5 files changed, 316 insertions(+), 37 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index aed6ec8d434..dfee4a10996 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -4,16 +4,20 @@ use std::sync::{Arc, Mutex}; -use near_epoch_manager::EpochManagerAdapter; -use near_store::{flat::FlatStorageReshardingStatus, ShardUId, StorageError}; -use tracing::{error, info}; +use near_chain_primitives::Error; +use near_primitives::shard_layout::ShardLayout; +use near_store::{ + flat::{store_helper, FlatStorageReshardingStatus, FlatStorageStatus, SplittingParentStatus}, + ShardUId, StorageError, +}; +use tracing::{debug, error, info}; use crate::types::RuntimeAdapter; /// `FlatStorageResharder` takes care of updating flat storage when a resharding event /// happens. /// -/// On an high level, the operations supported are: +/// On an high level, the events supported are: /// - #### Shard splitting /// Parent shard must be split into two children. The entire operation freezes the flat storage /// for the involved shards. @@ -24,22 +28,18 @@ use crate::types::RuntimeAdapter; /// It'll be necessary to perform catchup before the flat storage can be put again in Ready state. /// The parent shard storage is not needed anymore and can be removed. pub struct FlatStorageResharder { - epoch_manager: Arc, runtime: Arc, - resharding_event: Mutex>, + resharding_event: Mutex>, } impl FlatStorageResharder { /// Creates a new `FlatStorageResharder`. - pub fn new( - epoch_manager: Arc, - runtime: Arc, - ) -> Self { + pub fn new(runtime: Arc) -> Self { let resharding_event = Mutex::new(None); - Self { epoch_manager, runtime, resharding_event } + Self { runtime, resharding_event } } - /// Resumes a resharding operation that was in progress. + /// Resumes a resharding event that was in progress. pub fn resume( &self, shard_uid: &ShardUId, @@ -49,46 +49,284 @@ impl FlatStorageResharder { FlatStorageReshardingStatus::CreatingChild => { // Nothing to do here because the parent will take care of resuming work. } - FlatStorageReshardingStatus::SplittingParent(_) => { + FlatStorageReshardingStatus::SplittingParent(status) => { let parent_shard_uid = shard_uid; - info!(target: "resharding", ?parent_shard_uid, "resuming flat storage resharding"); + info!(target: "resharding", ?parent_shard_uid, ?status, "resuming flat storage resharding"); self.check_no_resharding_in_progress()?; // On resume flat storage status is already set. - // TODO(trisfald): create flat storage resharding event + self.split_shard_impl(*parent_shard_uid, &status); } FlatStorageReshardingStatus::CatchingUp(_) => { - // TODO(trisfald) + // TODO(Trisfald): implement catch up todo!() } } Ok(()) } - /// Starts the operation of splitting a parent shard flat storage into two children. - pub fn split_shard(&self, parent_shard_uid: &ShardUId) -> Result<(), StorageError> { - info!(target: "resharding", ?parent_shard_uid, "initiating flat storage split"); + /// Starts a resharding event deduced from the new shard layout provided. + /// + /// For now, only splitting a shard is supported. + pub fn start_resharding_from_new_shard_layout( + &self, + shard_layout: &ShardLayout, + ) -> Result<(), Error> { + match Self::event_params_from_shard_layout(&shard_layout)? { + ReshardingEventParams::Split(parent_shard, left_child_shard, right_child_shard) => { + self.split_shard(parent_shard, left_child_shard, right_child_shard, shard_layout) + } + } + } + + /// Starts the event of splitting a parent shard flat storage into two children. + pub fn split_shard( + &self, + parent_shard: ShardUId, + left_child_shard: ShardUId, + right_child_shard: ShardUId, + shard_layout: &ShardLayout, + ) -> Result<(), Error> { + info!(target: "resharding", ?parent_shard, ?left_child_shard, ?right_child_shard, "initiating flat storage split"); self.check_no_resharding_in_progress()?; - // Change parent shard flat storage status. - // todo(Trisfald): create flat storage resharding event + // Change parent and children shards flat storage status. + // TODO(trisfald): add metrics + let mut store_update = self.runtime.store().store_update(); + let status = SplittingParentStatus { + left_child_shard, + right_child_shard, + shard_layout: shard_layout.clone(), + latest_account_moved: None, + }; + store_helper::set_flat_storage_status( + &mut store_update, + parent_shard, + FlatStorageStatus::Resharding(FlatStorageReshardingStatus::SplittingParent( + status.clone(), + )), + ); + store_helper::set_flat_storage_status( + &mut store_update, + left_child_shard, + FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CreatingChild), + ); + store_helper::set_flat_storage_status( + &mut store_update, + right_child_shard, + FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CreatingChild), + ); + store_update.commit()?; + + self.split_shard_impl(parent_shard, &status); Ok(()) } fn check_no_resharding_in_progress(&self) -> Result<(), StorageError> { - // Do not allow multiple resharding operations in parallel. + // Do not allow multiple resharding events in parallel. if self.resharding_event.lock().unwrap().is_some() { - error!(target: "resharding", "trying to start a new flat storage resharding operation while one is already in progress!"); + error!(target: "resharding", "trying to start a new flat storage resharding event while one is already in progress!"); Err(StorageError::FlatStorageReshardingAlreadyInProgress) } else { Ok(()) } } + + fn set_resharding_event(&self, event: ReshardingEvent) { + *self.resharding_event.lock().unwrap() = Some(event); + } + + /// Returns the current in-progress resharding event, if any. + pub fn resharding_event(&self) -> Option { + self.resharding_event.lock().unwrap().clone() + } + + fn split_shard_impl(&self, parent_shard: ShardUId, status: &SplittingParentStatus) { + let event = ReshardingEvent::Split(parent_shard, status.clone()); + self.set_resharding_event(event); + debug!(target: "resharding", ?parent_shard, "starting flat storage split: copy of key-value pairs"); + + // TODO(Trisfald): implement copy of keys from parent to children + } + + fn event_params_from_shard_layout( + shard_layout: &ShardLayout, + ) -> Result { + // Resharder supports shard layout V2 onwards. + match shard_layout { + ShardLayout::V0(_) | ShardLayout::V1(_) => { + error!(target: "resharding", ?shard_layout, "unsupported shard layout!"); + return Err(Error::Other( + "flat storage resharding: unsupported shard layout".to_string(), + )); + } + } + + // Look for a shard having exactly two children, to trigger a split. + for shard in shard_layout.shard_ids() { + if let Ok(parent) = shard_layout.get_parent_shard_id(shard) { + if let Some(children) = shard_layout.get_children_shards_uids(parent) { + if children.len() == 2 { + return Ok(ReshardingEventParams::Split( + ShardUId::from_shard_id_and_layout(parent, &shard_layout), + children[0], + children[1], + )); + } + } + } + } + error!(target: "resharding", ?shard_layout, "no supported shard layout change found!"); + Err(Error::Other("flat storage resharding: shard layout doesn't contain any supported shard layout change".to_string())) + } +} + +/// Struct used to destructure a new shard layout definition into the resulting resharding event. +#[cfg_attr(test, derive(PartialEq, Eq))] +enum ReshardingEventParams { + /// Split a shard. + /// Includes: `parent_shard`, `left_child_shard` and `right_child_shard`. + Split(ShardUId, ShardUId, ShardUId), } /// Struct to describe, perform and track progress of a flat storage resharding. -pub struct FlatStorageReshardingEvent { - // TODO(Trisfald) - // add shard_uid parent, children - // add metrics - // add object to hold intermediate state +#[derive(Clone, Debug)] +pub enum ReshardingEvent { + /// Split a shard. + /// Includes the parent shard uid and the operation' status. + Split(ShardUId, SplittingParentStatus), +} + +#[cfg(test)] +mod tests { + use near_async::time::Clock; + use near_chain_configs::Genesis; + use near_epoch_manager::EpochManager; + use near_o11y::testonly::init_test_logger; + use near_primitives::shard_layout::ShardLayout; + use near_store::{genesis::initialize_genesis_state, test_utils::create_test_store}; + + use crate::runtime::NightshadeRuntime; + + use super::*; + + /// Shorthand to create account ID. + macro_rules! account { + ($str:expr) => { + $str.parse().unwrap() + }; + } + + /// Simple shard layout with two shards. + fn simple_shard_layout() -> ShardLayout { + // TODO(Trisfald): use shard layout v2 + ShardLayout::v1(vec![account!("ff")], None, 3) + } + + /// Derived from [simple_shard_layout] by splitting the second shard. + fn shard_layout_after_split() -> ShardLayout { + // TODO(Trisfald): use shard layout v2 + ShardLayout::v1(vec![account!("ff"), account!("pp")], Some(vec![vec![0], vec![1, 2]]), 3) + } + + /// Generic test setup. + fn create_fs_resharder(shard_layout: ShardLayout) -> FlatStorageResharder { + let num_shards = shard_layout.shard_ids().count(); + let genesis = Genesis::test_with_seeds( + Clock::real(), + vec![account!("aa"), account!("mm"), account!("vv")], + 1, + vec![1; num_shards], + shard_layout, + ); + let tempdir = tempfile::tempdir().unwrap(); + let store = create_test_store(); + initialize_genesis_state(store.clone(), &genesis, Some(tempdir.path())); + let epoch_manager = EpochManager::new_arc_handle(store.clone(), &genesis.config); + let runtime = NightshadeRuntime::test( + tempdir.path(), + store.clone(), + &genesis.config, + epoch_manager.clone(), + ); + FlatStorageResharder::new(runtime) + } + + /// Verify that the correct type of resharding is deduced from a new shard layout. + #[test] + fn parse_event_params_from_shard_layout() { + // No resharding specified. + assert!( + FlatStorageResharder::event_params_from_shard_layout(&simple_shard_layout()).is_err() + ); + + // Shard layouts V0 and V1 are rejected. + assert!(FlatStorageResharder::event_params_from_shard_layout( + &ShardLayout::v0_single_shard() + ) + .is_err()); + assert!( + FlatStorageResharder::event_params_from_shard_layout(&ShardLayout::v1_test()).is_err() + ); + + // Split shard. + { + let layout = shard_layout_after_split(); + let _event_params = FlatStorageResharder::event_params_from_shard_layout(&layout); + + // TODO(Trisfald): it won't work until we have shard layout v2. + // assert_eq!(event_params, ReshardingEventParams::Split(...)); + } + } + + /// Verify that another resharding can't be triggered if one is ongoing. + #[test] + fn concurrent_reshardings_are_disallowed() { + init_test_logger(); + let _resharder = create_fs_resharder(simple_shard_layout()); + let _new_shard_layout = shard_layout_after_split(); + + // TODO(Trisfald): it won't work until we have shard layout v2. + + // assert!(resharder.start_resharding_from_new_shard_layout(&new_shard_layout).is_ok()); + // assert!(resharder.resharding_event.lock().unwrap().is_some()); + // assert!(resharder.start_resharding_from_new_shard_layout(&new_shard_layout).is_err()); + } + + /// Flat storage shard status should be set correctly upon starting a shard split. + #[test] + fn flat_storage_split_status_set() { + init_test_logger(); + let resharder = create_fs_resharder(simple_shard_layout()); + let _new_shard_layout = shard_layout_after_split(); + + // TODO(Trisfald): it won't work until we have shard layout v2. + + // assert!(resharder.start_resharding_from_new_shard_layout(&new_shard_layout).is_ok()); + + let resharding_event = resharder.resharding_event(); + match resharding_event.unwrap() { + ReshardingEvent::Split(parent, status) => { + assert_eq!( + store_helper::get_flat_storage_status(resharder.runtime.store(), parent), + Ok(FlatStorageStatus::Resharding( + FlatStorageReshardingStatus::SplittingParent(status.clone()) + )) + ); + assert_eq!( + store_helper::get_flat_storage_status( + resharder.runtime.store(), + status.left_child_shard + ), + Ok(FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CreatingChild)) + ); + assert_eq!( + store_helper::get_flat_storage_status( + resharder.runtime.store(), + status.right_child_shard + ), + Ok(FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CreatingChild)) + ); + } + } + } } diff --git a/chain/client/src/client.rs b/chain/client/src/client.rs index 0e067166eef..24dda8a6b09 100644 --- a/chain/client/src/client.rs +++ b/chain/client/src/client.rs @@ -178,7 +178,7 @@ pub struct Client { /// See send_network_chain_info(). tier1_accounts_cache: Option<(EpochId, Arc)>, /// Takes care of performing resharding on the flat storage. - flat_storage_resharder: FlatStorageResharder, + pub flat_storage_resharder: FlatStorageResharder, /// Used when it is needed to create flat storage in background for some shards. flat_storage_creator: Option, /// A map storing the last time a block was requested for state sync. @@ -272,8 +272,7 @@ impl Client { async_computation_spawner.clone(), validator_signer.clone(), )?; - let flat_storage_resharder = - FlatStorageResharder::new(epoch_manager.clone(), runtime_adapter.clone()); + let flat_storage_resharder = FlatStorageResharder::new(runtime_adapter.clone()); // Create flat storage or initiate migration to flat storage. let flat_storage_creator = FlatStorageCreator::new( epoch_manager.clone(), diff --git a/core/primitives/src/shard_layout.rs b/core/primitives/src/shard_layout.rs index f15ab81b10d..64a42ed07c0 100644 --- a/core/primitives/src/shard_layout.rs +++ b/core/primitives/src/shard_layout.rs @@ -48,7 +48,16 @@ use std::{fmt, str}; pub type ShardVersion = u32; -#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] +#[derive( + BorshSerialize, + BorshDeserialize, + serde::Serialize, + serde::Deserialize, + Clone, + Debug, + PartialEq, + Eq, +)] pub enum ShardLayout { V0(ShardLayoutV0), V1(ShardLayoutV1), @@ -59,7 +68,16 @@ pub enum ShardLayout { /// to keep backward compatibility for some existing tests. /// `parent_shards` for `ShardLayoutV1` is always `None`, meaning it can only be the first shard layout /// a chain uses. -#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] +#[derive( + BorshSerialize, + BorshDeserialize, + serde::Serialize, + serde::Deserialize, + Clone, + Debug, + PartialEq, + Eq, +)] pub struct ShardLayoutV0 { /// Map accounts evenly across all shards num_shards: NumShards, @@ -73,7 +91,16 @@ pub struct ShardLayoutV0 { /// will be `[[0, 1, 2, 3]]` type ShardSplitMap = Vec>; -#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] +#[derive( + BorshSerialize, + BorshDeserialize, + serde::Serialize, + serde::Deserialize, + Clone, + Debug, + PartialEq, + Eq, +)] pub struct ShardLayoutV1 { /// The boundary accounts are the accounts on boundaries between shards. /// Each shard contains a range of accounts from one boundary account to diff --git a/core/store/src/flat/mod.rs b/core/store/src/flat/mod.rs index d50a2f1f1e1..a44b7c952d5 100644 --- a/core/store/src/flat/mod.rs +++ b/core/store/src/flat/mod.rs @@ -44,7 +44,7 @@ pub use metrics::FlatStorageCreationMetrics; pub use storage::FlatStorage; pub use types::{ BlockInfo, FetchingStateStatus, FlatStateIterator, FlatStorageCreationStatus, FlatStorageError, - FlatStorageReadyStatus, FlatStorageReshardingStatus, FlatStorageStatus, + FlatStorageReadyStatus, FlatStorageReshardingStatus, FlatStorageStatus, SplittingParentStatus, }; pub(crate) const POISONED_LOCK_ERR: &str = "The lock was poisoned."; diff --git a/core/store/src/flat/types.rs b/core/store/src/flat/types.rs index 3a2808f7e31..09a6e91a8dc 100644 --- a/core/store/src/flat/types.rs +++ b/core/store/src/flat/types.rs @@ -1,6 +1,7 @@ use borsh::{BorshDeserialize, BorshSerialize}; use near_primitives::errors::StorageError; use near_primitives::hash::CryptoHash; +use near_primitives::shard_layout::{ShardLayout, ShardUId}; use near_primitives::state::FlatStateValue; use near_primitives::types::{AccountId, BlockHeight}; use near_schema_checker_lib::ProtocolSchema; @@ -148,8 +149,7 @@ pub enum FlatStorageCreationStatus { pub enum FlatStorageReshardingStatus { /// Resharding phase entered when a shard is being split. /// Copy key-value pairs from this shard (the parent) to children shards. - /// Includes the latest AccountID moved from parent to child. - SplittingParent(Option), + SplittingParent(SplittingParentStatus), /// Resharding phase entered when a shard is being split. /// This shard (child) is being built from state taken from its parent. CreatingChild, @@ -181,5 +181,20 @@ pub struct FetchingStateStatus { pub num_parts: u64, } +/// Current step of resharding flat storage - splitting parent. +#[derive( + BorshSerialize, BorshDeserialize, Clone, Debug, PartialEq, Eq, serde::Serialize, ProtocolSchema, +)] +pub struct SplittingParentStatus { + /// UId of the left child shard. Will contain everything lesser than boundary account. + pub left_child_shard: ShardUId, + /// UId of the right child shard. Will contain everything greater or equal than boundary account. + pub right_child_shard: ShardUId, + /// The new shard layout. + pub shard_layout: ShardLayout, + /// The latest AccountID moved from parent to child. + pub latest_account_moved: Option, +} + pub type FlatStateIterator<'a> = Box, FlatStateValue)>> + 'a>; From d6823d3227bf3f663dac8ef582311f2e396e24db Mon Sep 17 00:00:00 2001 From: Trisfald Date: Wed, 25 Sep 2024 11:41:30 +0200 Subject: [PATCH 07/36] many improvements --- chain/chain/src/flat_storage_resharder.rs | 199 ++++++++++++++-------- core/store/src/flat/types.rs | 7 +- 2 files changed, 134 insertions(+), 72 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index dfee4a10996..93d1f491a27 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -28,15 +28,22 @@ use crate::types::RuntimeAdapter; /// It'll be necessary to perform catchup before the flat storage can be put again in Ready state. /// The parent shard storage is not needed anymore and can be removed. pub struct FlatStorageResharder { + inner: FlatStorageResharderInner, +} + +/// Inner clonable object to make sharing internal state easier. +#[derive(Clone)] +struct FlatStorageResharderInner { runtime: Arc, - resharding_event: Mutex>, + resharding_event: Arc>>, } impl FlatStorageResharder { /// Creates a new `FlatStorageResharder`. pub fn new(runtime: Arc) -> Self { - let resharding_event = Mutex::new(None); - Self { runtime, resharding_event } + let resharding_event = Arc::new(Mutex::new(None)); + let inner = FlatStorageResharderInner { runtime, resharding_event }; + Self { inner } } /// Resumes a resharding event that was in progress. @@ -44,22 +51,30 @@ impl FlatStorageResharder { &self, shard_uid: &ShardUId, status: &FlatStorageReshardingStatus, - ) -> Result<(), StorageError> { + ) -> Result<(), Error> { match status { FlatStorageReshardingStatus::CreatingChild => { // Nothing to do here because the parent will take care of resuming work. } FlatStorageReshardingStatus::SplittingParent(status) => { let parent_shard_uid = shard_uid; - info!(target: "resharding", ?parent_shard_uid, ?status, "resuming flat storage resharding"); + info!(target: "resharding", ?parent_shard_uid, ?status, "resuming flat storage shard split"); self.check_no_resharding_in_progress()?; // On resume flat storage status is already set. + // However, we don't know the current state of children shards, + // so it's better to clean them. + self.clean_children_shards(&status)?; self.split_shard_impl(*parent_shard_uid, &status); } FlatStorageReshardingStatus::CatchingUp(_) => { - // TODO(Trisfald): implement catch up + info!(target: "resharding", ?shard_uid, ?status, "resuming flat storage shard catchup"); + // TODO(Trisfald): implement child catch up todo!() } + FlatStorageReshardingStatus::ToBeDeleted => { + // Parent shard's content has been previously copied to the children. + // Nothing else to do. + } } Ok(()) } @@ -71,8 +86,8 @@ impl FlatStorageResharder { &self, shard_layout: &ShardLayout, ) -> Result<(), Error> { - match Self::event_params_from_shard_layout(&shard_layout)? { - ReshardingEventParams::Split(parent_shard, left_child_shard, right_child_shard) => { + match event_type_from_shard_layout(&shard_layout)? { + ReshardingEventType::Split(parent_shard, left_child_shard, right_child_shard) => { self.split_shard(parent_shard, left_child_shard, right_child_shard, shard_layout) } } @@ -90,13 +105,11 @@ impl FlatStorageResharder { self.check_no_resharding_in_progress()?; // Change parent and children shards flat storage status. - // TODO(trisfald): add metrics - let mut store_update = self.runtime.store().store_update(); + let mut store_update = self.inner.runtime.store().store_update(); let status = SplittingParentStatus { left_child_shard, right_child_shard, shard_layout: shard_layout.clone(), - latest_account_moved: None, }; store_helper::set_flat_storage_status( &mut store_update, @@ -121,9 +134,10 @@ impl FlatStorageResharder { Ok(()) } + /// Returns an error if a resharding event is in progress. fn check_no_resharding_in_progress(&self) -> Result<(), StorageError> { // Do not allow multiple resharding events in parallel. - if self.resharding_event.lock().unwrap().is_some() { + if self.resharding_event().is_some() { error!(target: "resharding", "trying to start a new flat storage resharding event while one is already in progress!"); Err(StorageError::FlatStorageReshardingAlreadyInProgress) } else { @@ -131,58 +145,91 @@ impl FlatStorageResharder { } } - fn set_resharding_event(&self, event: ReshardingEvent) { - *self.resharding_event.lock().unwrap() = Some(event); + fn set_resharding_event(&self, event: FlatStorageReshardingEvent) { + *self.inner.resharding_event.lock().unwrap() = Some(event); } /// Returns the current in-progress resharding event, if any. - pub fn resharding_event(&self) -> Option { - self.resharding_event.lock().unwrap().clone() + pub fn resharding_event(&self) -> Option { + self.inner.resharding_event.lock().unwrap().clone() } fn split_shard_impl(&self, parent_shard: ShardUId, status: &SplittingParentStatus) { - let event = ReshardingEvent::Split(parent_shard, status.clone()); + let event = FlatStorageReshardingEvent::Split(parent_shard, status.clone()); self.set_resharding_event(event); debug!(target: "resharding", ?parent_shard, "starting flat storage split: copy of key-value pairs"); - // TODO(Trisfald): implement copy of keys from parent to children + // TODO(Trisfald): start `split_shard_task` } - fn event_params_from_shard_layout( - shard_layout: &ShardLayout, - ) -> Result { - // Resharder supports shard layout V2 onwards. - match shard_layout { - ShardLayout::V0(_) | ShardLayout::V1(_) => { - error!(target: "resharding", ?shard_layout, "unsupported shard layout!"); - return Err(Error::Other( - "flat storage resharding: unsupported shard layout".to_string(), - )); - } + /// Cleans up children shards flat storage's content (status is excluded). + fn clean_children_shards(&self, status: &SplittingParentStatus) -> Result<(), Error> { + let mut store_update = self.inner.runtime.store().store_update(); + for child in [status.left_child_shard, status.right_child_shard] { + store_helper::remove_all_deltas(&mut store_update, child); + store_helper::remove_all_flat_state_values(&mut store_update, child); } + store_update.commit()?; + Ok(()) + } +} + +/// Takes as input a [ShardLayout] definition and deduces which kind of resharding operation must be +/// performed. +/// +/// Returns an error if there isn't any change in the shard layout that would require resharding. +fn event_type_from_shard_layout(shard_layout: &ShardLayout) -> Result { + // Resharding V3 supports shard layout V2 onwards. + match shard_layout { + ShardLayout::V0(_) | ShardLayout::V1(_) => { + error!(target: "resharding", ?shard_layout, "unsupported shard layout!"); + return Err(Error::Other("resharding: unsupported shard layout".to_string())); + } + } - // Look for a shard having exactly two children, to trigger a split. - for shard in shard_layout.shard_ids() { - if let Ok(parent) = shard_layout.get_parent_shard_id(shard) { - if let Some(children) = shard_layout.get_children_shards_uids(parent) { - if children.len() == 2 { - return Ok(ReshardingEventParams::Split( - ShardUId::from_shard_id_and_layout(parent, &shard_layout), - children[0], - children[1], - )); - } + let event = None; + // Look for a shard having exactly two children, to trigger a split. + for shard in shard_layout.shard_ids() { + let parent = shard_layout.get_parent_shard_id(shard)?; + if let Some(children) = shard_layout.get_children_shards_uids(parent) { + if children.len() == 2 { + if event.is_none() { + event = Some(ReshardingEventType::Split( + ShardUId::from_shard_id_and_layout(parent, &shard_layout), + children[0], + children[1], + )) + } else { + error!(target: "resharding", ?shard_layout, "two reshards can't be performed at the same time!"); + return Err(Error::Other( + "resharding: new shard layout requires two reshards".to_string(), + )); } } } - error!(target: "resharding", ?shard_layout, "no supported shard layout change found!"); - Err(Error::Other("flat storage resharding: shard layout doesn't contain any supported shard layout change".to_string())) } + event.ok_or_else(|| { + error!(target: "resharding", ?shard_layout, "no supported shard layout change found!"); + Error::Other( + "resharding: shard layout doesn't contain any supported shard layout change" + .to_string(), + ) + }) +} + +/// Task to perform the actual split of a flat storage shard. This may be a long operation time-wise. +/// +/// Conceptually it simply copies each key-value pair from the parent shard to the correct child. +#[allow(unused)] // TODO(Trisfald): remove annotation +fn split_shard_task(_resharder: FlatStorageResharderInner) { + // TODO(Trisfald): implement logic + // store_helper::iter_flat_state_entries + todo!() } /// Struct used to destructure a new shard layout definition into the resulting resharding event. #[cfg_attr(test, derive(PartialEq, Eq))] -enum ReshardingEventParams { +enum ReshardingEventType { /// Split a shard. /// Includes: `parent_shard`, `left_child_shard` and `right_child_shard`. Split(ShardUId, ShardUId, ShardUId), @@ -190,7 +237,7 @@ enum ReshardingEventParams { /// Struct to describe, perform and track progress of a flat storage resharding. #[derive(Clone, Debug)] -pub enum ReshardingEvent { +pub enum FlatStorageReshardingEvent { /// Split a shard. /// Includes the parent shard uid and the operation' status. Split(ShardUId, SplittingParentStatus), @@ -244,38 +291,37 @@ mod tests { let epoch_manager = EpochManager::new_arc_handle(store.clone(), &genesis.config); let runtime = NightshadeRuntime::test( tempdir.path(), - store.clone(), + store, &genesis.config, - epoch_manager.clone(), + epoch_manager, ); FlatStorageResharder::new(runtime) } /// Verify that the correct type of resharding is deduced from a new shard layout. #[test] - fn parse_event_params_from_shard_layout() { - // No resharding specified. - assert!( - FlatStorageResharder::event_params_from_shard_layout(&simple_shard_layout()).is_err() - ); + fn parse_event_type_from_shard_layout() { + // No resharding is not ok. + assert!(event_type_from_shard_layout(&simple_shard_layout()).is_err()); // Shard layouts V0 and V1 are rejected. - assert!(FlatStorageResharder::event_params_from_shard_layout( - &ShardLayout::v0_single_shard() - ) - .is_err()); - assert!( - FlatStorageResharder::event_params_from_shard_layout(&ShardLayout::v1_test()).is_err() - ); + assert!(event_type_from_shard_layout(&ShardLayout::v0_single_shard()).is_err()); + assert!(event_type_from_shard_layout(&ShardLayout::v1_test()).is_err()); - // Split shard. - { - let layout = shard_layout_after_split(); - let _event_params = FlatStorageResharder::event_params_from_shard_layout(&layout); + // Single split shard is ok. + let layout = shard_layout_after_split(); + let _event_type = event_type_from_shard_layout(&layout); + // TODO(Trisfald): it won't work until we have shard layout v2. + // assert_eq!(event_type, ReshardingEventType::Split(...)); - // TODO(Trisfald): it won't work until we have shard layout v2. - // assert_eq!(event_params, ReshardingEventParams::Split(...)); - } + // Double split shard is not ok. + // TODO(Trisfald): use shard layout v2 + let layout = ShardLayout::v1( + vec![account!("ff"), account!("pp"), account!("ss")], + Some(vec![vec![0, 2], vec![1, 2]]), + 3, + ); + assert!(event_type_from_shard_layout(&layout).is_err()); } /// Verify that another resharding can't be triggered if one is ongoing. @@ -288,6 +334,7 @@ mod tests { // TODO(Trisfald): it won't work until we have shard layout v2. // assert!(resharder.start_resharding_from_new_shard_layout(&new_shard_layout).is_ok()); + // TODO(Trisfald): find a way to make sure first resharding doesn't finish immediately // assert!(resharder.resharding_event.lock().unwrap().is_some()); // assert!(resharder.start_resharding_from_new_shard_layout(&new_shard_layout).is_err()); } @@ -305,23 +352,23 @@ mod tests { let resharding_event = resharder.resharding_event(); match resharding_event.unwrap() { - ReshardingEvent::Split(parent, status) => { + FlatStorageReshardingEvent::Split(parent, status) => { assert_eq!( - store_helper::get_flat_storage_status(resharder.runtime.store(), parent), + store_helper::get_flat_storage_status(resharder.inner.runtime.store(), parent), Ok(FlatStorageStatus::Resharding( FlatStorageReshardingStatus::SplittingParent(status.clone()) )) ); assert_eq!( store_helper::get_flat_storage_status( - resharder.runtime.store(), + resharder.inner.runtime.store(), status.left_child_shard ), Ok(FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CreatingChild)) ); assert_eq!( store_helper::get_flat_storage_status( - resharder.runtime.store(), + resharder.inner.runtime.store(), status.right_child_shard ), Ok(FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CreatingChild)) @@ -329,4 +376,18 @@ mod tests { } } } + + /// In this test we write some dirty state into children shards and then try to resume a shard split. + /// Verify that the dirty writes are cleaned up correctly. + #[test] + fn resume_split_starts_from_clean_state() { + todo!() + // Write some random keys in children shards. + + // Set parent state to ShardSplitting. + + // Resume resharding. + + // Children should not contain the random keys written before. + } } diff --git a/core/store/src/flat/types.rs b/core/store/src/flat/types.rs index 09a6e91a8dc..1f38b9011df 100644 --- a/core/store/src/flat/types.rs +++ b/core/store/src/flat/types.rs @@ -3,7 +3,7 @@ use near_primitives::errors::StorageError; use near_primitives::hash::CryptoHash; use near_primitives::shard_layout::{ShardLayout, ShardUId}; use near_primitives::state::FlatStateValue; -use near_primitives::types::{AccountId, BlockHeight}; +use near_primitives::types::BlockHeight; use near_schema_checker_lib::ProtocolSchema; #[derive(BorshSerialize, BorshDeserialize, Debug, Copy, Clone, PartialEq, Eq, serde::Serialize)] @@ -89,6 +89,7 @@ impl Into for &FlatStorageStatus { FlatStorageReshardingStatus::SplittingParent(_) => 20, FlatStorageReshardingStatus::CreatingChild => 21, FlatStorageReshardingStatus::CatchingUp(_) => 22, + FlatStorageReshardingStatus::ToBeDeleted => 23, }, } } @@ -156,6 +157,8 @@ pub enum FlatStorageReshardingStatus { /// We apply deltas from disk until the head reaches final head. /// Includes block hash of flat storage head. CatchingUp(CryptoHash), + /// The shard does no longer exist and its content should be deleted. + ToBeDeleted, } /// Current step of fetching state to fill flat storage. @@ -192,8 +195,6 @@ pub struct SplittingParentStatus { pub right_child_shard: ShardUId, /// The new shard layout. pub shard_layout: ShardLayout, - /// The latest AccountID moved from parent to child. - pub latest_account_moved: Option, } pub type FlatStateIterator<'a> = From 01959079d0bb4b4f38a576a6d3456dcf1a92cafb Mon Sep 17 00:00:00 2001 From: Trisfald Date: Thu, 26 Sep 2024 11:09:02 +0200 Subject: [PATCH 08/36] add test --- chain/chain/src/flat_storage_creator.rs | 2 +- chain/chain/src/flat_storage_resharder.rs | 96 ++++++++++++++++++++--- 2 files changed, 84 insertions(+), 14 deletions(-) diff --git a/chain/chain/src/flat_storage_creator.rs b/chain/chain/src/flat_storage_creator.rs index 080b3d7d39f..da0977a53cd 100644 --- a/chain/chain/src/flat_storage_creator.rs +++ b/chain/chain/src/flat_storage_creator.rs @@ -511,7 +511,7 @@ impl FlatStorageCreator { } FlatStorageStatus::Disabled => {} FlatStorageStatus::Resharding(status) => { - flat_storage_resharder.resume(&shard_uid, &status)?; + flat_storage_resharder.resume(shard_uid, &status)?; } } } diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index 93d1f491a27..0749bc00651 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -49,7 +49,7 @@ impl FlatStorageResharder { /// Resumes a resharding event that was in progress. pub fn resume( &self, - shard_uid: &ShardUId, + shard_uid: ShardUId, status: &FlatStorageReshardingStatus, ) -> Result<(), Error> { match status { @@ -64,7 +64,7 @@ impl FlatStorageResharder { // However, we don't know the current state of children shards, // so it's better to clean them. self.clean_children_shards(&status)?; - self.split_shard_impl(*parent_shard_uid, &status); + self.split_shard_impl(parent_shard_uid, &status); } FlatStorageReshardingStatus::CatchingUp(_) => { info!(target: "resharding", ?shard_uid, ?status, "resuming flat storage shard catchup"); @@ -249,7 +249,7 @@ mod tests { use near_chain_configs::Genesis; use near_epoch_manager::EpochManager; use near_o11y::testonly::init_test_logger; - use near_primitives::shard_layout::ShardLayout; + use near_primitives::{shard_layout::ShardLayout, state::FlatStateValue, types::AccountId}; use near_store::{genesis::initialize_genesis_state, test_utils::create_test_store}; use crate::runtime::NightshadeRuntime; @@ -259,7 +259,7 @@ mod tests { /// Shorthand to create account ID. macro_rules! account { ($str:expr) => { - $str.parse().unwrap() + $str.parse::().unwrap() }; } @@ -289,12 +289,8 @@ mod tests { let store = create_test_store(); initialize_genesis_state(store.clone(), &genesis, Some(tempdir.path())); let epoch_manager = EpochManager::new_arc_handle(store.clone(), &genesis.config); - let runtime = NightshadeRuntime::test( - tempdir.path(), - store, - &genesis.config, - epoch_manager, - ); + let runtime = + NightshadeRuntime::test(tempdir.path(), store, &genesis.config, epoch_manager); FlatStorageResharder::new(runtime) } @@ -381,13 +377,87 @@ mod tests { /// Verify that the dirty writes are cleaned up correctly. #[test] fn resume_split_starts_from_clean_state() { - todo!() - // Write some random keys in children shards. + init_test_logger(); + let resharder = create_fs_resharder(simple_shard_layout()); + let store = resharder.inner.runtime.store(); + let shard_layout = shard_layout_after_split(); + let resharding_type = event_type_from_shard_layout(&shard_layout).unwrap(); + let (parent, left_child_shard, right_child_shard) = match resharding_type { + ReshardingEventType::Split(parent, left_child, right_child) => { + (parent, left_child, right_child) + } + }; + + let mut store_update = store.store_update(); + + // Write some random key-values in children shards. + let dirty_key: Vec = vec![1, 2, 3, 4]; + let dirty_value = Some(FlatStateValue::Inlined(dirty_key.clone())); + for child_shard in [left_child_shard, right_child_shard] { + store_helper::set_flat_state_value( + &mut store_update, + child_shard, + dirty_key.clone(), + dirty_value.clone(), + ); + } + + // Set parent state to ShardSplitting, manually, to simulate a forcibly interrupted resharding attempt. + let resharding_status = + FlatStorageReshardingStatus::SplittingParent(SplittingParentStatus { + left_child_shard, + right_child_shard, + shard_layout, + }); + store_helper::set_flat_storage_status( + &mut store_update, + parent, + FlatStorageStatus::Resharding(resharding_status.clone()), + ); - // Set parent state to ShardSplitting. + store_update.commit().unwrap(); // Resume resharding. + resharder.resume(parent, &resharding_status).unwrap(); // Children should not contain the random keys written before. + for child_shard in [left_child_shard, right_child_shard] { + assert_eq!( + store_helper::get_flat_state_value(&store, child_shard, &dirty_key), + Ok(None) + ); + } + } + + /// Tests a simple split shard scenario. + /// + /// Old layout: + /// shard 0 -> accounts [aa] + /// shard 1 -> accounts [mm, vv] + /// + /// New layout: + /// shard 0 -> accounts [aa] + /// shard 2 -> accounts [mm] + /// shard 3 -> accounts [vv] + /// + /// Shard to split is shard 1. + #[test] + fn simple_split_shard() { + init_test_logger(); + // Perform resharding. + let resharder = create_fs_resharder(simple_shard_layout()); + let new_shard_layout = shard_layout_after_split(); + assert!(resharder.start_resharding_from_new_shard_layout(&new_shard_layout).is_ok()); + + // Check flat storages of children contain the correct accounts. + let left_child = ShardUId::from_shard_id_and_layout(2, &new_shard_layout); + let right_child = ShardUId::from_shard_id_and_layout(3, &new_shard_layout); + let store = resharder.inner.runtime.store(); + let account_mm = account!("mm"); + let account_vv = account!("vv"); + assert!(store_helper::get_flat_state_value(&store, left_child, account_mm.as_bytes()) + .is_ok_and(|val| val.is_some())); + assert!(store_helper::get_flat_state_value(&store, right_child, account_vv.as_bytes()) + .is_ok_and(|val| val.is_some())); } } From c7a687547b7a2bde834571ef260ee30b716a874e Mon Sep 17 00:00:00 2001 From: Trisfald Date: Thu, 26 Sep 2024 22:10:14 +0200 Subject: [PATCH 09/36] improve resharder api --- chain/chain/src/flat_storage_creator.rs | 7 +- chain/chain/src/flat_storage_resharder.rs | 300 ++++++++++++++++++---- 2 files changed, 249 insertions(+), 58 deletions(-) diff --git a/chain/chain/src/flat_storage_creator.rs b/chain/chain/src/flat_storage_creator.rs index da0977a53cd..1fdc29b0ed0 100644 --- a/chain/chain/src/flat_storage_creator.rs +++ b/chain/chain/src/flat_storage_creator.rs @@ -483,7 +483,7 @@ impl FlatStorageCreator { epoch_manager: &Arc, flat_storage_manager: &FlatStorageManager, runtime: &Arc, - flat_storage_resharder: &FlatStorageResharder, + _flat_storage_resharder: &FlatStorageResharder, ) -> Result, Error> { let epoch_id = &chain_head.epoch_id; tracing::debug!(target: "store", ?epoch_id, "creating flat storage for the current epoch"); @@ -510,8 +510,9 @@ impl FlatStorageCreator { ); } FlatStorageStatus::Disabled => {} - FlatStorageStatus::Resharding(status) => { - flat_storage_resharder.resume(shard_uid, &status)?; + FlatStorageStatus::Resharding(_status) => { + // TODO(Trisfald): call resume + // flat_storage_resharder.resume(shard_uid, &status, ...)?; } } } diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index 0749bc00651..47b4ceacf16 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -2,15 +2,16 @@ //! //! See [FlatStorageResharder] for more details about how the resharding takes place. -use std::sync::{Arc, Mutex}; +use std::sync::{atomic::AtomicBool, Arc, Mutex}; +use crossbeam_channel::{Receiver, Sender}; use near_chain_primitives::Error; -use near_primitives::shard_layout::ShardLayout; +use near_primitives::{shard_layout::ShardLayout, state::FlatStateValue}; use near_store::{ flat::{store_helper, FlatStorageReshardingStatus, FlatStorageStatus, SplittingParentStatus}, - ShardUId, StorageError, + ShardUId, StorageError, StoreUpdate, }; -use tracing::{debug, error, info}; +use tracing::{debug, error, info, warn}; use crate::types::RuntimeAdapter; @@ -27,6 +28,10 @@ use crate::types::RuntimeAdapter; /// After the copy is finished the children shard will have the correct state at some past block height. /// It'll be necessary to perform catchup before the flat storage can be put again in Ready state. /// The parent shard storage is not needed anymore and can be removed. +/// +/// The resharder has also the following properties: +/// - Background processing: the bulk of resharding is done in a separate task +/// - Interruptible: a reshard can be interrupted pub struct FlatStorageResharder { inner: FlatStorageResharderInner, } @@ -47,11 +52,14 @@ impl FlatStorageResharder { } /// Resumes a resharding event that was in progress. + /// + /// Returns an optional controller to manage the execution of background tasks. pub fn resume( &self, shard_uid: ShardUId, status: &FlatStorageReshardingStatus, - ) -> Result<(), Error> { + scheduler: &dyn FlatStorageResharderScheduler, + ) -> Result>, Error> { match status { FlatStorageReshardingStatus::CreatingChild => { // Nothing to do here because the parent will take care of resuming work. @@ -64,7 +72,7 @@ impl FlatStorageResharder { // However, we don't know the current state of children shards, // so it's better to clean them. self.clean_children_shards(&status)?; - self.split_shard_impl(parent_shard_uid, &status); + return Ok(Some(self.schedule_split_shard(parent_shard_uid, &status, scheduler))); } FlatStorageReshardingStatus::CatchingUp(_) => { info!(target: "resharding", ?shard_uid, ?status, "resuming flat storage shard catchup"); @@ -76,31 +84,37 @@ impl FlatStorageResharder { // Nothing else to do. } } - Ok(()) + Ok(None) } /// Starts a resharding event deduced from the new shard layout provided. /// /// For now, only splitting a shard is supported. + /// + /// # Args: + /// * `shard_layout`: the new shard layout, it must contain a layout change or an error is returned + /// * `scheduler`: component used to schedule the background tasks + /// + /// Returns a controller to manage the execution of the background task. pub fn start_resharding_from_new_shard_layout( &self, shard_layout: &ShardLayout, - ) -> Result<(), Error> { + scheduler: &dyn FlatStorageResharderScheduler, + ) -> Result, Error> { match event_type_from_shard_layout(&shard_layout)? { - ReshardingEventType::Split(parent_shard, left_child_shard, right_child_shard) => { - self.split_shard(parent_shard, left_child_shard, right_child_shard, shard_layout) - } + ReshardingEventType::Split(params) => self.split_shard(params, shard_layout, scheduler), } } /// Starts the event of splitting a parent shard flat storage into two children. - pub fn split_shard( + fn split_shard( &self, - parent_shard: ShardUId, - left_child_shard: ShardUId, - right_child_shard: ShardUId, + split_params: ReshardingSplitParams, shard_layout: &ShardLayout, - ) -> Result<(), Error> { + scheduler: &dyn FlatStorageResharderScheduler, + ) -> Result, Error> { + let ReshardingSplitParams { parent_shard, left_child_shard, right_child_shard } = + split_params; info!(target: "resharding", ?parent_shard, ?left_child_shard, ?right_child_shard, "initiating flat storage split"); self.check_no_resharding_in_progress()?; @@ -130,8 +144,7 @@ impl FlatStorageResharder { ); store_update.commit()?; - self.split_shard_impl(parent_shard, &status); - Ok(()) + Ok(self.schedule_split_shard(parent_shard, &status, scheduler)) } /// Returns an error if a resharding event is in progress. @@ -154,26 +167,53 @@ impl FlatStorageResharder { self.inner.resharding_event.lock().unwrap().clone() } - fn split_shard_impl(&self, parent_shard: ShardUId, status: &SplittingParentStatus) { + /// Schedules a task to split a shard. + fn schedule_split_shard( + &self, + parent_shard: ShardUId, + status: &SplittingParentStatus, + scheduler: &dyn FlatStorageResharderScheduler, + ) -> Arc { let event = FlatStorageReshardingEvent::Split(parent_shard, status.clone()); self.set_resharding_event(event); - debug!(target: "resharding", ?parent_shard, "starting flat storage split: copy of key-value pairs"); - - // TODO(Trisfald): start `split_shard_task` + debug!(target: "resharding", ?parent_shard, "scheduling flat storage split: copy of key-value pairs"); + + let resharder = self.inner.clone(); + let controller = Arc::new(FlatStorageResharderController::new()); + let controller_clone = controller.clone(); + let task = Box::new(move || split_shard_task(resharder, controller_clone)); + scheduler.schedule(task); + controller } /// Cleans up children shards flat storage's content (status is excluded). fn clean_children_shards(&self, status: &SplittingParentStatus) -> Result<(), Error> { + let SplittingParentStatus { left_child_shard, right_child_shard, .. } = status; + debug!(target: "resharding", ?left_child_shard, ?right_child_shard, "cleaning up children shards flat storage's content"); let mut store_update = self.inner.runtime.store().store_update(); - for child in [status.left_child_shard, status.right_child_shard] { - store_helper::remove_all_deltas(&mut store_update, child); - store_helper::remove_all_flat_state_values(&mut store_update, child); + for child in [left_child_shard, right_child_shard] { + store_helper::remove_all_deltas(&mut store_update, *child); + store_helper::remove_all_flat_state_values(&mut store_update, *child); } store_update.commit()?; Ok(()) } } +/// Struct used to destructure a new shard layout definition into the resulting resharding event. +#[cfg_attr(test, derive(PartialEq, Eq))] +enum ReshardingEventType { + /// Split of a shard. + Split(ReshardingSplitParams), +} + +#[cfg_attr(test, derive(PartialEq, Eq))] +struct ReshardingSplitParams { + parent_shard: ShardUId, + left_child_shard: ShardUId, + right_child_shard: ShardUId, +} + /// Takes as input a [ShardLayout] definition and deduces which kind of resharding operation must be /// performed. /// @@ -194,11 +234,14 @@ fn event_type_from_shard_layout(shard_layout: &ShardLayout) -> Result Result, +) { + let success = split_shard_task_impl(resharder.clone(), controller.clone()); + split_shard_task_postprocessing(resharder, success); + info!(target: "resharding", "flat storage shard split task finished, success: {success}"); + // TODO(Trisfald): change children state and parent and resharding event depending on outcome + if let Err(err) = controller.completion_sender.send(success) { + warn!(target: "resharding", "error notifying completion of flat storage shard split task ({err})") + }; } -/// Struct used to destructure a new shard layout definition into the resulting resharding event. -#[cfg_attr(test, derive(PartialEq, Eq))] -enum ReshardingEventType { - /// Split a shard. - /// Includes: `parent_shard`, `left_child_shard` and `right_child_shard`. - Split(ShardUId, ShardUId, ShardUId), +/// Performs the bulk of [split_shard_task]. +/// +/// Returns `true` if the routine completed successfully. +fn split_shard_task_impl( + resharder: FlatStorageResharderInner, + controller: Arc, +) -> bool { + if controller.is_interrupted() { + return false; + } + + /// Determines after how many key-values the process stops to + /// commit changes and to check interruptions. + const BATCH_SIZE: usize = 10_000; + + // Retrieve shard UIds and current resharding event status. + let (parent_shard, status) = { + let event = resharder.resharding_event.lock().unwrap(); + match event.as_ref() { + Some(FlatStorageReshardingEvent::Split(parent_shard, status)) => { + (*parent_shard, status.clone()) + } + None => panic!("a resharding event must exist!"), + } + }; + let SplittingParentStatus { left_child_shard, right_child_shard, .. } = status; + + // Prepare the store object for commits and the iterator over parent's flat storage. + let store = resharder.runtime.store(); + let mut iter = store_helper::iter_flat_state_entries(parent_shard, store, None, None); + + loop { + let mut store_update = store.store_update(); + + // Process a `BATCH_SIZE` worth of key value pairs. + let mut iter_exhausted = false; + for _ in 0..BATCH_SIZE { + match iter.next() { + Some(Ok(kv)) => { + shard_split_handle_key_value(kv, &mut store_update); + } + Some(Err(err)) => { + error!(target: "resharding", "failed to read flat storage value from parent shard ({err})"); + return false; + } + None => { + iter_exhausted = true; + } + } + } + + // Make a pause to commit and check if the routine should stop. + if let Err(err) = store_update.commit() { + error!(target: "resharding", "failed to commit store update ({err})"); + return false; + } + + // TODO(Trisfald): metrics and logs + + // If `iter`` is exhausted we can exit after the store commit. + if iter_exhausted { + break; + } + if controller.is_interrupted() { + return false; + } + } + true +} + +/// Handles the inheritance of a key-value pair from parent shard to children shards. +fn shard_split_handle_key_value(kv: (Vec, FlatStateValue), store_update: &mut StoreUpdate) { + // TODO(Trisfald): implement +} + +/// Performs post-processing of shard splitting after all key-values have been moved from parent to children. +/// `success` indicates whether or not the previous phase was successful. +fn split_shard_task_postprocessing(resharder: FlatStorageResharderInner, success: bool) { + if success { + // Split shard completed successfully. + // TODO(Trisfald): see what to do + } else { + // We got an error or an interrupt request. + // Remove children shards leftovers and reset parent shard status. + // TODO(Trisfald): implement + } + // Terminate the resharding event. + *resharder.resharding_event.lock().unwrap() = None; } /// Struct to describe, perform and track progress of a flat storage resharding. @@ -243,13 +374,50 @@ pub enum FlatStorageReshardingEvent { Split(ShardUId, SplittingParentStatus), } +/// Helps control the flat storage resharder operation. More specifically, +/// it has a way to know when the background task is done or to interrupt it. +pub struct FlatStorageResharderController { + /// Set this flag to true if the resharding should be interrupted. + interrupt: AtomicBool, + /// This object will be used to signal when the background task is completed. + /// A value of `true` means that the operation completed successfully. + completion_sender: Sender, + /// Corresponding receiver for `completion_sender`. + pub completion_receiver: Receiver, +} + +impl FlatStorageResharderController { + /// Creates a new `FlatStorageResharderController`. + pub fn new() -> Self { + let (completion_sender, completion_receiver) = crossbeam_channel::bounded(1); + Self { interrupt: AtomicBool::new(false), completion_sender, completion_receiver } + } + + /// Interrupts any ongoing task. + pub fn interrupt(&self) { + self.interrupt.store(true, std::sync::atomic::Ordering::SeqCst); + } + + /// Returns whether or not background task is interrupted. + pub fn is_interrupted(&self) -> bool { + self.interrupt.load(std::sync::atomic::Ordering::SeqCst) + } +} + +/// Represent the capability of scheduling the background tasks spawned by flat storage resharding. +pub trait FlatStorageResharderScheduler { + fn schedule(&self, f: Box); +} + #[cfg(test)] mod tests { use near_async::time::Clock; use near_chain_configs::Genesis; use near_epoch_manager::EpochManager; use near_o11y::testonly::init_test_logger; - use near_primitives::{shard_layout::ShardLayout, state::FlatStateValue, types::AccountId}; + use near_primitives::{ + shard_layout::ShardLayout, state::FlatStateValue, trie_key::TrieKey, types::AccountId, + }; use near_store::{genesis::initialize_genesis_state, test_utils::create_test_store}; use crate::runtime::NightshadeRuntime; @@ -263,6 +431,14 @@ mod tests { }; } + struct TestScheduler {} + + impl FlatStorageResharderScheduler for TestScheduler { + fn schedule(&self, f: Box) { + f(); + } + } + /// Simple shard layout with two shards. fn simple_shard_layout() -> ShardLayout { // TODO(Trisfald): use shard layout v2 @@ -330,7 +506,10 @@ mod tests { // TODO(Trisfald): it won't work until we have shard layout v2. // assert!(resharder.start_resharding_from_new_shard_layout(&new_shard_layout).is_ok()); - // TODO(Trisfald): find a way to make sure first resharding doesn't finish immediately + + // Immediately interrupt the resharding. + // TODO + // assert!(resharder.resharding_event.lock().unwrap().is_some()); // assert!(resharder.start_resharding_from_new_shard_layout(&new_shard_layout).is_err()); } @@ -382,11 +561,10 @@ mod tests { let store = resharder.inner.runtime.store(); let shard_layout = shard_layout_after_split(); let resharding_type = event_type_from_shard_layout(&shard_layout).unwrap(); - let (parent, left_child_shard, right_child_shard) = match resharding_type { - ReshardingEventType::Split(parent, left_child, right_child) => { - (parent, left_child, right_child) - } - }; + let ReshardingSplitParams { parent_shard, left_child_shard, right_child_shard } = + match resharding_type { + ReshardingEventType::Split(params) => params, + }; let mut store_update = store.store_update(); @@ -411,14 +589,15 @@ mod tests { }); store_helper::set_flat_storage_status( &mut store_update, - parent, + parent_shard, FlatStorageStatus::Resharding(resharding_status.clone()), ); store_update.commit().unwrap(); // Resume resharding. - resharder.resume(parent, &resharding_status).unwrap(); + let scheduler = TestScheduler {}; + let _ = resharder.resume(parent_shard, &resharding_status, &scheduler).unwrap(); // Children should not contain the random keys written before. for child_shard in [left_child_shard, right_child_shard] { @@ -447,17 +626,28 @@ mod tests { // Perform resharding. let resharder = create_fs_resharder(simple_shard_layout()); let new_shard_layout = shard_layout_after_split(); - assert!(resharder.start_resharding_from_new_shard_layout(&new_shard_layout).is_ok()); + let scheduler = TestScheduler {}; + + let result = + resharder.start_resharding_from_new_shard_layout(&new_shard_layout, &scheduler); + assert!(result.is_ok()); + let controller = result.unwrap(); // Check flat storages of children contain the correct accounts. let left_child = ShardUId::from_shard_id_and_layout(2, &new_shard_layout); let right_child = ShardUId::from_shard_id_and_layout(3, &new_shard_layout); let store = resharder.inner.runtime.store(); - let account_mm = account!("mm"); - let account_vv = account!("vv"); - assert!(store_helper::get_flat_state_value(&store, left_child, account_mm.as_bytes()) + let account_mm_key = TrieKey::Account { account_id: account!("mm") }; + let account_vv_key = TrieKey::Account { account_id: account!("vv") }; + assert!(store_helper::get_flat_state_value(&store, left_child, &account_mm_key.to_vec()) .is_ok_and(|val| val.is_some())); - assert!(store_helper::get_flat_state_value(&store, right_child, account_vv.as_bytes()) + assert!(store_helper::get_flat_state_value(&store, right_child, &account_vv_key.to_vec()) .is_ok_and(|val| val.is_some())); + + // Check status of children and parent flat storages. + todo!(); + + // Controller should signal that resharding ended. + todo!(); } } From 79730a42676e597a95a2944aa233a9730063115b Mon Sep 17 00:00:00 2001 From: Trisfald Date: Fri, 27 Sep 2024 11:07:20 +0200 Subject: [PATCH 10/36] use the new flat store adapter --- chain/chain/src/flat_storage_resharder.rs | 74 ++++++++++------------- 1 file changed, 31 insertions(+), 43 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index 47b4ceacf16..8e8569b3c35 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -8,8 +8,9 @@ use crossbeam_channel::{Receiver, Sender}; use near_chain_primitives::Error; use near_primitives::{shard_layout::ShardLayout, state::FlatStateValue}; use near_store::{ - flat::{store_helper, FlatStorageReshardingStatus, FlatStorageStatus, SplittingParentStatus}, - ShardUId, StorageError, StoreUpdate, + adapter::{flat_store::FlatStoreUpdateAdapter, StoreAdapter}, + flat::{FlatStorageReshardingStatus, FlatStorageStatus, SplittingParentStatus}, + ShardUId, StorageError }; use tracing::{debug, error, info, warn}; @@ -119,26 +120,23 @@ impl FlatStorageResharder { self.check_no_resharding_in_progress()?; // Change parent and children shards flat storage status. - let mut store_update = self.inner.runtime.store().store_update(); + let mut store_update = self.inner.runtime.store().flat_store().store_update(); let status = SplittingParentStatus { left_child_shard, right_child_shard, shard_layout: shard_layout.clone(), }; - store_helper::set_flat_storage_status( - &mut store_update, + store_update.set_flat_storage_status( parent_shard, FlatStorageStatus::Resharding(FlatStorageReshardingStatus::SplittingParent( status.clone(), )), ); - store_helper::set_flat_storage_status( - &mut store_update, + store_update.set_flat_storage_status( left_child_shard, FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CreatingChild), ); - store_helper::set_flat_storage_status( - &mut store_update, + store_update.set_flat_storage_status( right_child_shard, FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CreatingChild), ); @@ -190,10 +188,10 @@ impl FlatStorageResharder { fn clean_children_shards(&self, status: &SplittingParentStatus) -> Result<(), Error> { let SplittingParentStatus { left_child_shard, right_child_shard, .. } = status; debug!(target: "resharding", ?left_child_shard, ?right_child_shard, "cleaning up children shards flat storage's content"); - let mut store_update = self.inner.runtime.store().store_update(); + let mut store_update = self.inner.runtime.store().flat_store().store_update(); for child in [left_child_shard, right_child_shard] { - store_helper::remove_all_deltas(&mut store_update, *child); - store_helper::remove_all_flat_state_values(&mut store_update, *child); + store_update.remove_all_deltas(*child); + store_update.remove_all(*child); } store_update.commit()?; Ok(()) @@ -270,7 +268,6 @@ fn split_shard_task( let success = split_shard_task_impl(resharder.clone(), controller.clone()); split_shard_task_postprocessing(resharder, success); info!(target: "resharding", "flat storage shard split task finished, success: {success}"); - // TODO(Trisfald): change children state and parent and resharding event depending on outcome if let Err(err) = controller.completion_sender.send(success) { warn!(target: "resharding", "error notifying completion of flat storage shard split task ({err})") }; @@ -304,11 +301,11 @@ fn split_shard_task_impl( let SplittingParentStatus { left_child_shard, right_child_shard, .. } = status; // Prepare the store object for commits and the iterator over parent's flat storage. - let store = resharder.runtime.store(); - let mut iter = store_helper::iter_flat_state_entries(parent_shard, store, None, None); + let flat_store = resharder.runtime.store().flat_store(); + let mut iter = flat_store.iter(parent_shard); loop { - let mut store_update = store.store_update(); + let mut store_update = flat_store.store_update(); // Process a `BATCH_SIZE` worth of key value pairs. let mut iter_exhausted = false; @@ -347,7 +344,10 @@ fn split_shard_task_impl( } /// Handles the inheritance of a key-value pair from parent shard to children shards. -fn shard_split_handle_key_value(kv: (Vec, FlatStateValue), store_update: &mut StoreUpdate) { +fn shard_split_handle_key_value( + kv: (Vec, FlatStateValue), + store_update: &mut FlatStoreUpdateAdapter, +) { // TODO(Trisfald): implement } @@ -520,6 +520,7 @@ mod tests { init_test_logger(); let resharder = create_fs_resharder(simple_shard_layout()); let _new_shard_layout = shard_layout_after_split(); + let mut flat_store = resharder.inner.runtime.store().flat_store(); // TODO(Trisfald): it won't work until we have shard layout v2. @@ -529,23 +530,17 @@ mod tests { match resharding_event.unwrap() { FlatStorageReshardingEvent::Split(parent, status) => { assert_eq!( - store_helper::get_flat_storage_status(resharder.inner.runtime.store(), parent), + flat_store.get_flat_storage_status(parent), Ok(FlatStorageStatus::Resharding( FlatStorageReshardingStatus::SplittingParent(status.clone()) )) ); assert_eq!( - store_helper::get_flat_storage_status( - resharder.inner.runtime.store(), - status.left_child_shard - ), + flat_store.get_flat_storage_status(status.left_child_shard), Ok(FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CreatingChild)) ); assert_eq!( - store_helper::get_flat_storage_status( - resharder.inner.runtime.store(), - status.right_child_shard - ), + flat_store.get_flat_storage_status(status.right_child_shard), Ok(FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CreatingChild)) ); } @@ -558,7 +553,7 @@ mod tests { fn resume_split_starts_from_clean_state() { init_test_logger(); let resharder = create_fs_resharder(simple_shard_layout()); - let store = resharder.inner.runtime.store(); + let flat_store = resharder.inner.runtime.store().flat_store(); let shard_layout = shard_layout_after_split(); let resharding_type = event_type_from_shard_layout(&shard_layout).unwrap(); let ReshardingSplitParams { parent_shard, left_child_shard, right_child_shard } = @@ -566,18 +561,13 @@ mod tests { ReshardingEventType::Split(params) => params, }; - let mut store_update = store.store_update(); + let mut store_update = flat_store.store_update(); // Write some random key-values in children shards. let dirty_key: Vec = vec![1, 2, 3, 4]; let dirty_value = Some(FlatStateValue::Inlined(dirty_key.clone())); for child_shard in [left_child_shard, right_child_shard] { - store_helper::set_flat_state_value( - &mut store_update, - child_shard, - dirty_key.clone(), - dirty_value.clone(), - ); + store_update.set(child_shard, dirty_key.clone(), dirty_value.clone()); } // Set parent state to ShardSplitting, manually, to simulate a forcibly interrupted resharding attempt. @@ -587,8 +577,7 @@ mod tests { right_child_shard, shard_layout, }); - store_helper::set_flat_storage_status( - &mut store_update, + store_update.set_flat_storage_status( parent_shard, FlatStorageStatus::Resharding(resharding_status.clone()), ); @@ -601,10 +590,7 @@ mod tests { // Children should not contain the random keys written before. for child_shard in [left_child_shard, right_child_shard] { - assert_eq!( - store_helper::get_flat_state_value(&store, child_shard, &dirty_key), - Ok(None) - ); + assert_eq!(flat_store.get(child_shard, &dirty_key), Ok(None)); } } @@ -636,12 +622,14 @@ mod tests { // Check flat storages of children contain the correct accounts. let left_child = ShardUId::from_shard_id_and_layout(2, &new_shard_layout); let right_child = ShardUId::from_shard_id_and_layout(3, &new_shard_layout); - let store = resharder.inner.runtime.store(); + let flat_store = resharder.inner.runtime.store().flat_store(); let account_mm_key = TrieKey::Account { account_id: account!("mm") }; let account_vv_key = TrieKey::Account { account_id: account!("vv") }; - assert!(store_helper::get_flat_state_value(&store, left_child, &account_mm_key.to_vec()) + assert!(flat_store + .get(left_child, &account_mm_key.to_vec()) .is_ok_and(|val| val.is_some())); - assert!(store_helper::get_flat_state_value(&store, right_child, &account_vv_key.to_vec()) + assert!(flat_store + .get(right_child, &account_vv_key.to_vec()) .is_ok_and(|val| val.is_some())); // Check status of children and parent flat storages. From ad83e0b093b208b2ddbb78934c392e1672651401 Mon Sep 17 00:00:00 2001 From: Trisfald Date: Fri, 27 Sep 2024 15:45:56 +0200 Subject: [PATCH 11/36] implement simple key value copy --- chain/chain/src/flat_storage_resharder.rs | 268 +++++++++++++++++----- core/store/src/flat/types.rs | 2 + 2 files changed, 209 insertions(+), 61 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index 8e8569b3c35..57673b72e08 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -2,15 +2,28 @@ //! //! See [FlatStorageResharder] for more details about how the resharding takes place. -use std::sync::{atomic::AtomicBool, Arc, Mutex}; +use std::sync::{Arc, Mutex}; use crossbeam_channel::{Receiver, Sender}; +use near_chain_configs::ReshardingHandle; use near_chain_primitives::Error; -use near_primitives::{shard_layout::ShardLayout, state::FlatStateValue}; +use near_primitives::{ + hash::CryptoHash, + shard_layout::{account_id_to_shard_id, ShardLayout}, + state::FlatStateValue, + trie_key::{ + col, + trie_key_parsers::{ + parse_account_id_from_access_key_key, parse_account_id_from_account_key, + parse_account_id_from_contract_code_key, + }, + }, + types::AccountId, +}; use near_store::{ adapter::{flat_store::FlatStoreUpdateAdapter, StoreAdapter}, flat::{FlatStorageReshardingStatus, FlatStorageStatus, SplittingParentStatus}, - ShardUId, StorageError + ShardUId, StorageError, }; use tracing::{debug, error, info, warn}; @@ -54,13 +67,18 @@ impl FlatStorageResharder { /// Resumes a resharding event that was in progress. /// - /// Returns an optional controller to manage the execution of background tasks. + /// # Args: + /// * `shard_uid`: UId of the shard + /// * `status`: resharding status of the shard + /// * `scheduler`: component used to schedule the background tasks + /// * `controller`: manages the execution of the background tasks pub fn resume( &self, shard_uid: ShardUId, status: &FlatStorageReshardingStatus, scheduler: &dyn FlatStorageResharderScheduler, - ) -> Result>, Error> { + controller: FlatStorageResharderController, + ) -> Result<(), Error> { match status { FlatStorageReshardingStatus::CreatingChild => { // Nothing to do here because the parent will take care of resuming work. @@ -73,7 +91,7 @@ impl FlatStorageResharder { // However, we don't know the current state of children shards, // so it's better to clean them. self.clean_children_shards(&status)?; - return Ok(Some(self.schedule_split_shard(parent_shard_uid, &status, scheduler))); + self.schedule_split_shard(parent_shard_uid, &status, scheduler, controller); } FlatStorageReshardingStatus::CatchingUp(_) => { info!(target: "resharding", ?shard_uid, ?status, "resuming flat storage shard catchup"); @@ -85,7 +103,7 @@ impl FlatStorageResharder { // Nothing else to do. } } - Ok(None) + Ok(()) } /// Starts a resharding event deduced from the new shard layout provided. @@ -94,17 +112,22 @@ impl FlatStorageResharder { /// /// # Args: /// * `shard_layout`: the new shard layout, it must contain a layout change or an error is returned + /// * `block_hash`: block hash of the block in which the split happens /// * `scheduler`: component used to schedule the background tasks - /// - /// Returns a controller to manage the execution of the background task. + /// * `controller`: manages the execution of the background tasks pub fn start_resharding_from_new_shard_layout( &self, shard_layout: &ShardLayout, + block_hash: &CryptoHash, scheduler: &dyn FlatStorageResharderScheduler, - ) -> Result, Error> { + controller: FlatStorageResharderController, + ) -> Result<(), Error> { match event_type_from_shard_layout(&shard_layout)? { - ReshardingEventType::Split(params) => self.split_shard(params, shard_layout, scheduler), - } + ReshardingEventType::Split(params) => { + self.split_shard(params, shard_layout, block_hash, scheduler, controller)? + } + }; + Ok(()) } /// Starts the event of splitting a parent shard flat storage into two children. @@ -112,8 +135,10 @@ impl FlatStorageResharder { &self, split_params: ReshardingSplitParams, shard_layout: &ShardLayout, + block_hash: &CryptoHash, scheduler: &dyn FlatStorageResharderScheduler, - ) -> Result, Error> { + controller: FlatStorageResharderController, + ) -> Result<(), Error> { let ReshardingSplitParams { parent_shard, left_child_shard, right_child_shard } = split_params; info!(target: "resharding", ?parent_shard, ?left_child_shard, ?right_child_shard, "initiating flat storage split"); @@ -125,6 +150,7 @@ impl FlatStorageResharder { left_child_shard, right_child_shard, shard_layout: shard_layout.clone(), + block_hash: *block_hash, }; store_update.set_flat_storage_status( parent_shard, @@ -142,7 +168,8 @@ impl FlatStorageResharder { ); store_update.commit()?; - Ok(self.schedule_split_shard(parent_shard, &status, scheduler)) + self.schedule_split_shard(parent_shard, &status, scheduler, controller); + Ok(()) } /// Returns an error if a resharding event is in progress. @@ -171,17 +198,15 @@ impl FlatStorageResharder { parent_shard: ShardUId, status: &SplittingParentStatus, scheduler: &dyn FlatStorageResharderScheduler, - ) -> Arc { + controller: FlatStorageResharderController, + ) { let event = FlatStorageReshardingEvent::Split(parent_shard, status.clone()); self.set_resharding_event(event); debug!(target: "resharding", ?parent_shard, "scheduling flat storage split: copy of key-value pairs"); let resharder = self.inner.clone(); - let controller = Arc::new(FlatStorageResharderController::new()); - let controller_clone = controller.clone(); - let task = Box::new(move || split_shard_task(resharder, controller_clone)); + let task = Box::new(move || split_shard_task(resharder, controller)); scheduler.schedule(task); - controller } /// Cleans up children shards flat storage's content (status is excluded). @@ -207,8 +232,11 @@ enum ReshardingEventType { #[cfg_attr(test, derive(PartialEq, Eq))] struct ReshardingSplitParams { + // Shard being split. parent_shard: ShardUId, + // Child to the left of the account boundary. left_child_shard: ShardUId, + // Child to the right of the account boundary. right_child_shard: ShardUId, } @@ -263,22 +291,36 @@ fn event_type_from_shard_layout(shard_layout: &ShardLayout) -> Result, + controller: FlatStorageResharderController, ) { let success = split_shard_task_impl(resharder.clone(), controller.clone()); split_shard_task_postprocessing(resharder, success); info!(target: "resharding", "flat storage shard split task finished, success: {success}"); if let Err(err) = controller.completion_sender.send(success) { - warn!(target: "resharding", "error notifying completion of flat storage shard split task ({err})") + warn!(target: "resharding", ?err, "error notifying completion of flat storage shard split task") }; } +/// Retrieve parent shard UIds and current resharding event status. +/// Resharding event must be of type "Split". +fn get_parent_shard_and_status( + resharder: &FlatStorageResharderInner, +) -> (ShardUId, SplittingParentStatus) { + let event = resharder.resharding_event.lock().unwrap(); + match event.as_ref() { + Some(FlatStorageReshardingEvent::Split(parent_shard, status)) => { + (*parent_shard, status.clone()) + } + None => panic!("a resharding event must exist!"), + } +} + /// Performs the bulk of [split_shard_task]. /// /// Returns `true` if the routine completed successfully. fn split_shard_task_impl( resharder: FlatStorageResharderInner, - controller: Arc, + controller: FlatStorageResharderController, ) -> bool { if controller.is_interrupted() { return false; @@ -288,17 +330,7 @@ fn split_shard_task_impl( /// commit changes and to check interruptions. const BATCH_SIZE: usize = 10_000; - // Retrieve shard UIds and current resharding event status. - let (parent_shard, status) = { - let event = resharder.resharding_event.lock().unwrap(); - match event.as_ref() { - Some(FlatStorageReshardingEvent::Split(parent_shard, status)) => { - (*parent_shard, status.clone()) - } - None => panic!("a resharding event must exist!"), - } - }; - let SplittingParentStatus { left_child_shard, right_child_shard, .. } = status; + let (parent_shard, status) = get_parent_shard_and_status(&resharder); // Prepare the store object for commits and the iterator over parent's flat storage. let flat_store = resharder.runtime.store().flat_store(); @@ -312,10 +344,13 @@ fn split_shard_task_impl( for _ in 0..BATCH_SIZE { match iter.next() { Some(Ok(kv)) => { - shard_split_handle_key_value(kv, &mut store_update); + if let Err(err) = shard_split_handle_key_value(kv, &mut store_update, &status) { + error!(target: "resharding", ?err, "failed to handle flat storage key"); + return false; + } } Some(Err(err)) => { - error!(target: "resharding", "failed to read flat storage value from parent shard ({err})"); + error!(target: "resharding", ?err, "failed to read flat storage value from parent shard"); return false; } None => { @@ -326,7 +361,7 @@ fn split_shard_task_impl( // Make a pause to commit and check if the routine should stop. if let Err(err) = store_update.commit() { - error!(target: "resharding", "failed to commit store update ({err})"); + error!(target: "resharding", ?err, "failed to commit store update"); return false; } @@ -347,21 +382,90 @@ fn split_shard_task_impl( fn shard_split_handle_key_value( kv: (Vec, FlatStateValue), store_update: &mut FlatStoreUpdateAdapter, -) { - // TODO(Trisfald): implement + status: &SplittingParentStatus, +) -> Result<(), std::io::Error> { + let (key, value) = kv; + if key.is_empty() { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "flat storage key is empty", + )); + } + let key_column_prefix = key[0]; + let SplittingParentStatus { left_child_shard, right_child_shard, shard_layout, .. } = status; + + // Copies a key value pair to the correct child by matching the account id to the new shard. + let copy_kv_to_child = + |account_id_parser: fn(&[u8]) -> Result| -> Result<(), std::io::Error> { + // Derive the shard uid for this account in the new shard layout. + let account_id = account_id_parser(&key)?; + let new_shard_id = account_id_to_shard_id(&account_id, shard_layout); + let mut new_shard_uid = ShardUId::from_shard_id_and_layout(new_shard_id, &shard_layout); + + // TODO(Trisfald): HACK until ShardLayoutV2 is there. + new_shard_uid.shard_id += 1; + + // Sanity check we are truly writing to one of the expected children shards. + if new_shard_uid != *left_child_shard && new_shard_uid != *right_child_shard { + let err_msg = "account id doesn't map to any child shard!"; + error!(target: "resharding", ?new_shard_uid, ?left_child_shard, ?right_child_shard, ?shard_layout, err_msg); + return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, err_msg)); + } + // Add the new flat store entry. + store_update.set(new_shard_uid, key, Some(value)); + Ok(()) + }; + + match key_column_prefix { + col::ACCOUNT => { + copy_kv_to_child(parse_account_id_from_account_key)?; + } + col::CONTRACT_DATA => todo!(), + col::CONTRACT_CODE => { + copy_kv_to_child(parse_account_id_from_contract_code_key)?; + } + col::ACCESS_KEY => { + copy_kv_to_child(parse_account_id_from_access_key_key)?; + } + col::RECEIVED_DATA => todo!(), + col::POSTPONED_RECEIPT_ID => todo!(), + col::PENDING_DATA_COUNT => todo!(), + col::POSTPONED_RECEIPT => todo!(), + col::DELAYED_RECEIPT_OR_INDICES => todo!(), + _ => unreachable!(), + } + Ok(()) } /// Performs post-processing of shard splitting after all key-values have been moved from parent to children. /// `success` indicates whether or not the previous phase was successful. fn split_shard_task_postprocessing(resharder: FlatStorageResharderInner, success: bool) { + let (parent_shard, status) = get_parent_shard_and_status(&resharder); + let SplittingParentStatus { left_child_shard, right_child_shard, block_hash, .. } = status; + let flat_store = resharder.runtime.store().flat_store(); + let mut store_update = flat_store.store_update(); if success { // Split shard completed successfully. - // TODO(Trisfald): see what to do + // Parent flat storage can be later deleted. + store_update.set_flat_storage_status( + parent_shard, + FlatStorageStatus::Resharding(FlatStorageReshardingStatus::ToBeDeleted), + ); + // TODO(trisfald): trigger parent delete + // Children must perform catchup. + for shard in [left_child_shard, right_child_shard] { + store_update.set_flat_storage_status( + shard, + FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CatchingUp(block_hash)), + ); + } + // TODO(trisfald): trigger catchup } else { // We got an error or an interrupt request. // Remove children shards leftovers and reset parent shard status. // TODO(Trisfald): implement } + store_update.commit().unwrap(); // Terminate the resharding event. *resharder.resharding_event.lock().unwrap() = None; } @@ -376,9 +480,10 @@ pub enum FlatStorageReshardingEvent { /// Helps control the flat storage resharder operation. More specifically, /// it has a way to know when the background task is done or to interrupt it. +#[derive(Clone)] pub struct FlatStorageResharderController { - /// Set this flag to true if the resharding should be interrupted. - interrupt: AtomicBool, + /// Resharding handle to control interruption. + handle: ReshardingHandle, /// This object will be used to signal when the background task is completed. /// A value of `true` means that the operation completed successfully. completion_sender: Sender, @@ -387,20 +492,25 @@ pub struct FlatStorageResharderController { } impl FlatStorageResharderController { - /// Creates a new `FlatStorageResharderController`. + /// Creates a new `FlatStorageResharderController` with its own handle. pub fn new() -> Self { let (completion_sender, completion_receiver) = crossbeam_channel::bounded(1); - Self { interrupt: AtomicBool::new(false), completion_sender, completion_receiver } + let handle = ReshardingHandle::new(); + Self { handle, completion_sender, completion_receiver } + } + + pub fn from_resharding_handle(handle: ReshardingHandle) -> Self { + let (completion_sender, completion_receiver) = crossbeam_channel::bounded(1); + Self { handle, completion_sender, completion_receiver } } - /// Interrupts any ongoing task. - pub fn interrupt(&self) { - self.interrupt.store(true, std::sync::atomic::Ordering::SeqCst); + pub fn handle(&self) -> &ReshardingHandle { + &self.handle } /// Returns whether or not background task is interrupted. pub fn is_interrupted(&self) -> bool { - self.interrupt.load(std::sync::atomic::Ordering::SeqCst) + !self.handle.get() } } @@ -411,12 +521,15 @@ pub trait FlatStorageResharderScheduler { #[cfg(test)] mod tests { + use std::time::Duration; + use near_async::time::Clock; use near_chain_configs::Genesis; use near_epoch_manager::EpochManager; use near_o11y::testonly::init_test_logger; use near_primitives::{ - shard_layout::ShardLayout, state::FlatStateValue, trie_key::TrieKey, types::AccountId, + hash::CryptoHash, shard_layout::ShardLayout, state::FlatStateValue, trie_key::TrieKey, + types::AccountId, }; use near_store::{genesis::initialize_genesis_state, test_utils::create_test_store}; @@ -502,13 +615,14 @@ mod tests { init_test_logger(); let _resharder = create_fs_resharder(simple_shard_layout()); let _new_shard_layout = shard_layout_after_split(); + let controller = FlatStorageResharderController::new(); // TODO(Trisfald): it won't work until we have shard layout v2. // assert!(resharder.start_resharding_from_new_shard_layout(&new_shard_layout).is_ok()); // Immediately interrupt the resharding. - // TODO + controller.handle().stop(); // assert!(resharder.resharding_event.lock().unwrap().is_some()); // assert!(resharder.start_resharding_from_new_shard_layout(&new_shard_layout).is_err()); @@ -520,7 +634,7 @@ mod tests { init_test_logger(); let resharder = create_fs_resharder(simple_shard_layout()); let _new_shard_layout = shard_layout_after_split(); - let mut flat_store = resharder.inner.runtime.store().flat_store(); + let flat_store = resharder.inner.runtime.store().flat_store(); // TODO(Trisfald): it won't work until we have shard layout v2. @@ -576,6 +690,7 @@ mod tests { left_child_shard, right_child_shard, shard_layout, + block_hash: CryptoHash::default(), }); store_update.set_flat_storage_status( parent_shard, @@ -586,7 +701,8 @@ mod tests { // Resume resharding. let scheduler = TestScheduler {}; - let _ = resharder.resume(parent_shard, &resharding_status, &scheduler).unwrap(); + let controller = FlatStorageResharderController::new(); + resharder.resume(parent_shard, &resharding_status, &scheduler, controller).unwrap(); // Children should not contain the random keys written before. for child_shard in [left_child_shard, right_child_shard] { @@ -612,16 +728,29 @@ mod tests { // Perform resharding. let resharder = create_fs_resharder(simple_shard_layout()); let new_shard_layout = shard_layout_after_split(); + let block_hash = CryptoHash::default(); let scheduler = TestScheduler {}; - - let result = - resharder.start_resharding_from_new_shard_layout(&new_shard_layout, &scheduler); + let controller = FlatStorageResharderController::new(); + + let result = resharder.split_shard( + ReshardingSplitParams { + parent_shard: ShardUId { version: 3, shard_id: 1 }, + left_child_shard: ShardUId { version: 3, shard_id: 2 }, + right_child_shard: ShardUId { version: 3, shard_id: 3 }, + }, + &new_shard_layout, + &CryptoHash::default(), + &scheduler, + controller.clone(), + ); + // TODO(Trisfald): replace the above with this simple call + // let result = + // resharder.start_resharding_from_new_shard_layout(&new_shard_layout, &scheduler); assert!(result.is_ok()); - let controller = result.unwrap(); // Check flat storages of children contain the correct accounts. - let left_child = ShardUId::from_shard_id_and_layout(2, &new_shard_layout); - let right_child = ShardUId::from_shard_id_and_layout(3, &new_shard_layout); + let left_child = ShardUId { version: 3, shard_id: 2 }; + let right_child = ShardUId { version: 3, shard_id: 3 }; let flat_store = resharder.inner.runtime.store().flat_store(); let account_mm_key = TrieKey::Account { account_id: account!("mm") }; let account_vv_key = TrieKey::Account { account_id: account!("vv") }; @@ -632,10 +761,27 @@ mod tests { .get(right_child, &account_vv_key.to_vec()) .is_ok_and(|val| val.is_some())); - // Check status of children and parent flat storages. - todo!(); - // Controller should signal that resharding ended. - todo!(); + assert_eq!(controller.completion_receiver.recv_timeout(Duration::from_secs(1)), Ok(true)); + + // Check final status of children and parent flat storages. + let parent = ShardUId { version: 3, shard_id: 1 }; + assert_eq!( + flat_store.get_flat_storage_status(parent), + Ok(FlatStorageStatus::Resharding(FlatStorageReshardingStatus::ToBeDeleted)) + ); + assert_eq!( + flat_store.get_flat_storage_status(left_child), + Ok(FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CatchingUp(block_hash))) + ); + assert_eq!( + flat_store.get_flat_storage_status(left_child), + Ok(FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CatchingUp(block_hash))) + ); + } + + #[test] + fn interrupt_split_shard() { + // TODO(Trisfald): implement } } diff --git a/core/store/src/flat/types.rs b/core/store/src/flat/types.rs index 84c757760b1..108b3090def 100644 --- a/core/store/src/flat/types.rs +++ b/core/store/src/flat/types.rs @@ -188,6 +188,8 @@ pub struct SplittingParentStatus { pub right_child_shard: ShardUId, /// The new shard layout. pub shard_layout: ShardLayout, + /// The block has at which the split began. + pub block_hash: CryptoHash, } pub type FlatStateIterator<'a> = From ae824532ac8a20b524cfa6860d309a088a0a38f1 Mon Sep 17 00:00:00 2001 From: Trisfald Date: Mon, 30 Sep 2024 11:53:48 +0200 Subject: [PATCH 12/36] add implemnetation of flat storage resharding interrupt --- chain/chain-primitives/src/error.rs | 5 + chain/chain/src/flat_storage_resharder.rs | 143 +++++++++++++++++----- core/store/src/adapter/flat_store.rs | 4 + core/store/src/flat/types.rs | 4 +- 4 files changed, 124 insertions(+), 32 deletions(-) diff --git a/chain/chain-primitives/src/error.rs b/chain/chain-primitives/src/error.rs index f394a31addf..759b776c35e 100644 --- a/chain/chain-primitives/src/error.rs +++ b/chain/chain-primitives/src/error.rs @@ -232,6 +232,9 @@ pub enum Error { /// GC error. #[error("GC Error: {0}")] GCError(String), + /// Resharding error. + #[error("Resharding Error: {0}")] + ReshardingError(String), /// Anything else #[error("Other Error: {0}")] Other(String), @@ -269,6 +272,7 @@ impl Error { | Error::CannotBeFinalized | Error::StorageError(_) | Error::GCError(_) + | Error::ReshardingError(_) | Error::DBNotFoundErr(_) => false, Error::InvalidBlockPastTime(_, _) | Error::InvalidBlockFutureTime(_) @@ -392,6 +396,7 @@ impl Error { Error::NotAValidator(_) => "not_a_validator", Error::NotAChunkValidator => "not_a_chunk_validator", Error::InvalidChallengeRoot => "invalid_challenge_root", + Error::ReshardingError(_) => "resharding_error", } } } diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index 57673b72e08..853f951e29c 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -8,7 +8,6 @@ use crossbeam_channel::{Receiver, Sender}; use near_chain_configs::ReshardingHandle; use near_chain_primitives::Error; use near_primitives::{ - hash::CryptoHash, shard_layout::{account_id_to_shard_id, ShardLayout}, state::FlatStateValue, trie_key::{ @@ -22,7 +21,10 @@ use near_primitives::{ }; use near_store::{ adapter::{flat_store::FlatStoreUpdateAdapter, StoreAdapter}, - flat::{FlatStorageReshardingStatus, FlatStorageStatus, SplittingParentStatus}, + flat::{ + FlatStorageReadyStatus, FlatStorageReshardingStatus, FlatStorageStatus, + SplittingParentStatus, + }, ShardUId, StorageError, }; use tracing::{debug, error, info, warn}; @@ -112,19 +114,17 @@ impl FlatStorageResharder { /// /// # Args: /// * `shard_layout`: the new shard layout, it must contain a layout change or an error is returned - /// * `block_hash`: block hash of the block in which the split happens /// * `scheduler`: component used to schedule the background tasks /// * `controller`: manages the execution of the background tasks pub fn start_resharding_from_new_shard_layout( &self, shard_layout: &ShardLayout, - block_hash: &CryptoHash, scheduler: &dyn FlatStorageResharderScheduler, controller: FlatStorageResharderController, ) -> Result<(), Error> { match event_type_from_shard_layout(&shard_layout)? { ReshardingEventType::Split(params) => { - self.split_shard(params, shard_layout, block_hash, scheduler, controller)? + self.split_shard(params, shard_layout, scheduler, controller)? } }; Ok(()) @@ -135,7 +135,6 @@ impl FlatStorageResharder { &self, split_params: ReshardingSplitParams, shard_layout: &ShardLayout, - block_hash: &CryptoHash, scheduler: &dyn FlatStorageResharderScheduler, controller: FlatStorageResharderController, ) -> Result<(), Error> { @@ -144,13 +143,27 @@ impl FlatStorageResharder { info!(target: "resharding", ?parent_shard, ?left_child_shard, ?right_child_shard, "initiating flat storage split"); self.check_no_resharding_in_progress()?; + // Parent shard must be in ready state. + let store = self.inner.runtime.store().flat_store(); + let flat_head = if let FlatStorageStatus::Ready(FlatStorageReadyStatus { flat_head }) = + store + .get_flat_storage_status(parent_shard) + .map_err(|err| Into::::into(err))? + { + flat_head + } else { + let err_msg = "flat storage parent shard is not ready!"; + error!(target: "resharding", ?parent_shard, err_msg); + return Err(Error::ReshardingError(err_msg.to_owned())); + }; + // Change parent and children shards flat storage status. - let mut store_update = self.inner.runtime.store().flat_store().store_update(); + let mut store_update = store.store_update(); let status = SplittingParentStatus { left_child_shard, right_child_shard, shard_layout: shard_layout.clone(), - block_hash: *block_hash, + flat_head, }; store_update.set_flat_storage_status( parent_shard, @@ -248,8 +261,9 @@ fn event_type_from_shard_layout(shard_layout: &ShardLayout) -> Result { - error!(target: "resharding", ?shard_layout, "unsupported shard layout!"); - return Err(Error::Other("resharding: unsupported shard layout".to_string())); + let err_msg = "unsupported shard layout!"; + error!(target: "resharding", ?shard_layout, err_msg); + return Err(Error::ReshardingError(err_msg.to_owned())); } } @@ -269,20 +283,17 @@ fn event_type_from_shard_layout(shard_layout: &ShardLayout) -> Result>>, + } + + impl DelayedScheduler { + fn call(&self) { + self.callable.take().unwrap()(); + } + } + + impl FlatStorageResharderScheduler for DelayedScheduler { + fn schedule(&self, f: Box) { + *self.callable.borrow_mut() = Some(f); + } + } + /// Simple shard layout with two shards. fn simple_shard_layout() -> ShardLayout { // TODO(Trisfald): use shard layout v2 @@ -690,7 +733,12 @@ mod tests { left_child_shard, right_child_shard, shard_layout, - block_hash: CryptoHash::default(), + // Values don't matter. + flat_head: BlockInfo { + hash: CryptoHash::default(), + height: 1, + prev_hash: CryptoHash::default(), + }, }); store_update.set_flat_storage_status( parent_shard, @@ -739,7 +787,6 @@ mod tests { right_child_shard: ShardUId { version: 3, shard_id: 3 }, }, &new_shard_layout, - &CryptoHash::default(), &scheduler, controller.clone(), ); @@ -782,6 +829,42 @@ mod tests { #[test] fn interrupt_split_shard() { - // TODO(Trisfald): implement + init_test_logger(); + // Perform resharding. + let resharder = create_fs_resharder(simple_shard_layout()); + let new_shard_layout = shard_layout_after_split(); + let scheduler = DelayedScheduler::default(); + let controller = FlatStorageResharderController::new(); + + assert!(resharder + .start_resharding_from_new_shard_layout( + &new_shard_layout, + &scheduler, + controller.clone() + ) + .is_ok()); + let (parent_shard, status) = get_parent_shard_and_status(&resharder.inner); + let SplittingParentStatus { left_child_shard, right_child_shard, flat_head, .. } = status; + + // Interrupt the task before it starts. + controller.handle().stop(); + + // Run the task. + scheduler.call(); + + // Check that resharding was effectively interrupted. + let flat_store = resharder.inner.runtime.store().flat_store(); + assert_eq!(controller.completion_receiver.recv_timeout(Duration::from_secs(1)), Ok(false)); + assert_eq!( + flat_store.get_flat_storage_status(parent_shard), + Ok(FlatStorageStatus::Ready(FlatStorageReadyStatus { flat_head })) + ); + for child_shard in [left_child_shard, right_child_shard] { + assert_eq!( + flat_store.get_flat_storage_status(status.left_child_shard), + Ok(FlatStorageStatus::Empty) + ); + assert_eq!(flat_store.iter(child_shard).count(), 0); + } } } diff --git a/core/store/src/adapter/flat_store.rs b/core/store/src/adapter/flat_store.rs index 07eabea27e6..aab223e4a91 100644 --- a/core/store/src/adapter/flat_store.rs +++ b/core/store/src/adapter/flat_store.rs @@ -244,6 +244,10 @@ impl<'a> FlatStoreUpdateAdapter<'a> { .expect("Borsh should not have failed here") } + pub fn remove_status(&mut self, shard_uid: ShardUId) { + self.store_update.delete(DBCol::FlatStorageStatus, &shard_uid.to_bytes()); + } + pub fn set_delta(&mut self, shard_uid: ShardUId, delta: &FlatStateDelta) { let key = KeyForFlatStateDelta { shard_uid, block_hash: delta.metadata.block.hash }.to_bytes(); diff --git a/core/store/src/flat/types.rs b/core/store/src/flat/types.rs index 108b3090def..15e5b883608 100644 --- a/core/store/src/flat/types.rs +++ b/core/store/src/flat/types.rs @@ -188,8 +188,8 @@ pub struct SplittingParentStatus { pub right_child_shard: ShardUId, /// The new shard layout. pub shard_layout: ShardLayout, - /// The block has at which the split began. - pub block_hash: CryptoHash, + /// Parent's flat head state when the split began. + pub flat_head: BlockInfo, } pub type FlatStateIterator<'a> = From e735755ec626e2e5a8fec3ea5c2947c6d842c6e9 Mon Sep 17 00:00:00 2001 From: Trisfald Date: Mon, 30 Sep 2024 12:12:09 +0200 Subject: [PATCH 13/36] change tests setup to use a test chain --- chain/chain/src/flat_storage_resharder.rs | 45 ++++++++++++++++------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index 853f951e29c..a2d003b1dec 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -546,8 +546,8 @@ mod tests { use std::{cell::RefCell, time::Duration}; use near_async::time::Clock; - use near_chain_configs::Genesis; - use near_epoch_manager::EpochManager; + use near_chain_configs::{Genesis, MutableConfigValue}; + use near_epoch_manager::{shard_tracker::ShardTracker, EpochManager}; use near_o11y::testonly::init_test_logger; use near_primitives::{ hash::CryptoHash, shard_layout::ShardLayout, state::FlatStateValue, trie_key::TrieKey, @@ -559,7 +559,10 @@ mod tests { test_utils::create_test_store, }; - use crate::runtime::NightshadeRuntime; + use crate::{ + rayon_spawner::RayonAsyncComputationSpawner, runtime::NightshadeRuntime, + types::ChainConfig, Chain, ChainGenesis, DoomslugThresholdMode, + }; use super::*; @@ -608,7 +611,7 @@ mod tests { } /// Generic test setup. - fn create_fs_resharder(shard_layout: ShardLayout) -> FlatStorageResharder { + fn create_fs_resharder(shard_layout: ShardLayout) -> (Chain, FlatStorageResharder) { let num_shards = shard_layout.shard_ids().count(); let genesis = Genesis::test_with_seeds( Clock::real(), @@ -621,9 +624,25 @@ mod tests { let store = create_test_store(); initialize_genesis_state(store.clone(), &genesis, Some(tempdir.path())); let epoch_manager = EpochManager::new_arc_handle(store.clone(), &genesis.config); + let shard_tracker = ShardTracker::new_empty(epoch_manager.clone()); let runtime = - NightshadeRuntime::test(tempdir.path(), store, &genesis.config, epoch_manager); - FlatStorageResharder::new(runtime) + NightshadeRuntime::test(tempdir.path(), store, &genesis.config, epoch_manager.clone()); + let chain_genesis = ChainGenesis::new(&genesis.config); + let chain = Chain::new( + Clock::real(), + epoch_manager, + shard_tracker, + runtime.clone(), + &chain_genesis, + DoomslugThresholdMode::NoApprovals, + ChainConfig::test(), + None, + Arc::new(RayonAsyncComputationSpawner), + MutableConfigValue::new(None, "validator_signer"), + ) + .unwrap(); + + (chain, FlatStorageResharder::new(runtime)) } /// Verify that the correct type of resharding is deduced from a new shard layout. @@ -675,7 +694,7 @@ mod tests { #[test] fn flat_storage_split_status_set() { init_test_logger(); - let resharder = create_fs_resharder(simple_shard_layout()); + let (_, resharder) = create_fs_resharder(simple_shard_layout()); let _new_shard_layout = shard_layout_after_split(); let flat_store = resharder.inner.runtime.store().flat_store(); @@ -709,7 +728,7 @@ mod tests { #[test] fn resume_split_starts_from_clean_state() { init_test_logger(); - let resharder = create_fs_resharder(simple_shard_layout()); + let (_, resharder) = create_fs_resharder(simple_shard_layout()); let flat_store = resharder.inner.runtime.store().flat_store(); let shard_layout = shard_layout_after_split(); let resharding_type = event_type_from_shard_layout(&shard_layout).unwrap(); @@ -774,9 +793,8 @@ mod tests { fn simple_split_shard() { init_test_logger(); // Perform resharding. - let resharder = create_fs_resharder(simple_shard_layout()); + let (chain, resharder) = create_fs_resharder(simple_shard_layout()); let new_shard_layout = shard_layout_after_split(); - let block_hash = CryptoHash::default(); let scheduler = TestScheduler {}; let controller = FlatStorageResharderController::new(); @@ -817,13 +835,14 @@ mod tests { flat_store.get_flat_storage_status(parent), Ok(FlatStorageStatus::Resharding(FlatStorageReshardingStatus::ToBeDeleted)) ); + let last_hash = chain.head().unwrap().last_block_hash; assert_eq!( flat_store.get_flat_storage_status(left_child), - Ok(FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CatchingUp(block_hash))) + Ok(FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CatchingUp(last_hash))) ); assert_eq!( flat_store.get_flat_storage_status(left_child), - Ok(FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CatchingUp(block_hash))) + Ok(FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CatchingUp(last_hash))) ); } @@ -831,7 +850,7 @@ mod tests { fn interrupt_split_shard() { init_test_logger(); // Perform resharding. - let resharder = create_fs_resharder(simple_shard_layout()); + let (_, resharder) = create_fs_resharder(simple_shard_layout()); let new_shard_layout = shard_layout_after_split(); let scheduler = DelayedScheduler::default(); let controller = FlatStorageResharderController::new(); From fc38090c821cad03612abd5b0445ab088cc007e1 Mon Sep 17 00:00:00 2001 From: Trisfald Date: Mon, 30 Sep 2024 14:22:54 +0200 Subject: [PATCH 14/36] handle more DB columns --- chain/chain/src/flat_storage_resharder.rs | 50 +++++++++++++++-------- 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index a2d003b1dec..06e5cb6ebd7 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -11,10 +11,11 @@ use near_primitives::{ shard_layout::{account_id_to_shard_id, ShardLayout}, state::FlatStateValue, trie_key::{ - col, + col::{self, ALL_COLUMNS_WITH_NAMES}, trie_key_parsers::{ parse_account_id_from_access_key_key, parse_account_id_from_account_key, - parse_account_id_from_contract_code_key, + parse_account_id_from_contract_code_key, parse_account_id_from_contract_data_key, + parse_account_id_from_received_data_key, parse_account_id_from_trie_key_with_separator, }, }, types::AccountId, @@ -428,21 +429,38 @@ fn shard_split_handle_key_value( }; match key_column_prefix { - col::ACCOUNT => { - copy_kv_to_child(parse_account_id_from_account_key)?; - } - col::CONTRACT_DATA => todo!(), - col::CONTRACT_CODE => { - copy_kv_to_child(parse_account_id_from_contract_code_key)?; - } - col::ACCESS_KEY => { - copy_kv_to_child(parse_account_id_from_access_key_key)?; - } - col::RECEIVED_DATA => todo!(), - col::POSTPONED_RECEIPT_ID => todo!(), - col::PENDING_DATA_COUNT => todo!(), - col::POSTPONED_RECEIPT => todo!(), + col::ACCOUNT => copy_kv_to_child(parse_account_id_from_account_key)?, + col::CONTRACT_DATA => copy_kv_to_child(parse_account_id_from_contract_data_key)?, + col::CONTRACT_CODE => copy_kv_to_child(parse_account_id_from_contract_code_key)?, + col::ACCESS_KEY => copy_kv_to_child(parse_account_id_from_access_key_key)?, + col::RECEIVED_DATA => copy_kv_to_child(parse_account_id_from_received_data_key)?, + col::POSTPONED_RECEIPT_ID => copy_kv_to_child(|raw_key: &[u8]| { + parse_account_id_from_trie_key_with_separator( + col::POSTPONED_RECEIPT_ID, + raw_key, + ALL_COLUMNS_WITH_NAMES[col::POSTPONED_RECEIPT_ID as usize].1, + ) + })?, + col::PENDING_DATA_COUNT => copy_kv_to_child(|raw_key: &[u8]| { + parse_account_id_from_trie_key_with_separator( + col::PENDING_DATA_COUNT, + raw_key, + ALL_COLUMNS_WITH_NAMES[col::PENDING_DATA_COUNT as usize].1, + ) + })?, + col::POSTPONED_RECEIPT => copy_kv_to_child(|raw_key: &[u8]| { + parse_account_id_from_trie_key_with_separator( + col::POSTPONED_RECEIPT, + raw_key, + ALL_COLUMNS_WITH_NAMES[col::POSTPONED_RECEIPT as usize].1, + ) + })?, col::DELAYED_RECEIPT_OR_INDICES => todo!(), + col::PROMISE_YIELD_INDICES => todo!(), + col::PROMISE_YIELD_TIMEOUT => todo!(), + col::PROMISE_YIELD_RECEIPT => todo!(), + col::BUFFERED_RECEIPT_INDICES => todo!(), + col::BUFFERED_RECEIPT => todo!(), _ => unreachable!(), } Ok(()) From 75c42b5a2a47448d4ff3155caf2bf3c7477636a4 Mon Sep 17 00:00:00 2001 From: Trisfald Date: Mon, 30 Sep 2024 16:14:01 +0200 Subject: [PATCH 15/36] improve comments --- chain/chain/src/flat_storage_resharder.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index 06e5cb6ebd7..d88855134a8 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -47,8 +47,9 @@ use crate::types::RuntimeAdapter; /// The parent shard storage is not needed anymore and can be removed. /// /// The resharder has also the following properties: -/// - Background processing: the bulk of resharding is done in a separate task -/// - Interruptible: a reshard can be interrupted +/// - Background processing: the bulk of resharding is done in a separate task, see [FlatStorageResharderScheduler] +/// - Interruptible: a reshard operation can be interrupted through a [FlatStorageResharderController]. +/// - In the case of event `Split` the state of flat storage will go back to what it was previously. pub struct FlatStorageResharder { inner: FlatStorageResharderInner, } @@ -141,7 +142,7 @@ impl FlatStorageResharder { ) -> Result<(), Error> { let ReshardingSplitParams { parent_shard, left_child_shard, right_child_shard } = split_params; - info!(target: "resharding", ?parent_shard, ?left_child_shard, ?right_child_shard, "initiating flat storage split"); + info!(target: "resharding", ?parent_shard, ?left_child_shard, ?right_child_shard, "initiating flat storage shard split"); self.check_no_resharding_in_progress()?; // Parent shard must be in ready state. @@ -216,7 +217,7 @@ impl FlatStorageResharder { ) { let event = FlatStorageReshardingEvent::Split(parent_shard, status.clone()); self.set_resharding_event(event); - debug!(target: "resharding", ?parent_shard, "scheduling flat storage split: copy of key-value pairs"); + info!(target: "resharding", ?parent_shard, ?status,"scheduling flat storage shard split"); let resharder = self.inner.clone(); let task = Box::new(move || split_shard_task(resharder, controller)); @@ -344,6 +345,8 @@ fn split_shard_task_impl( let (parent_shard, status) = get_parent_shard_and_status(&resharder); + info!(target: "resharding", ?parent_shard, "flat storage shard split task: starting key-values copy"); + // Prepare the store object for commits and the iterator over parent's flat storage. let flat_store = resharder.runtime.store().flat_store(); let mut iter = flat_store.iter(parent_shard); @@ -472,6 +475,8 @@ fn split_shard_task_postprocessing(resharder: FlatStorageResharderInner, success let (parent_shard, status) = get_parent_shard_and_status(&resharder); let SplittingParentStatus { left_child_shard, right_child_shard, flat_head, .. } = status; let flat_store = resharder.runtime.store().flat_store(); + info!(target: "resharding", ?parent_shard, "flat storage shard split task: post-processing"); + let mut store_update = flat_store.store_update(); if success { // Split shard completed successfully. From 306c502f53f2482486798ad5dbd03882335378df Mon Sep 17 00:00:00 2001 From: Trisfald Date: Mon, 30 Sep 2024 17:00:04 +0200 Subject: [PATCH 16/36] fix unit tests --- chain/chain/src/flat_storage_resharder.rs | 130 ++++++++++++++-------- core/primitives/src/shard_layout.rs | 11 +- 2 files changed, 95 insertions(+), 46 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index d88855134a8..f8930643831 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -239,12 +239,14 @@ impl FlatStorageResharder { } /// Struct used to destructure a new shard layout definition into the resulting resharding event. +#[derive(Debug)] #[cfg_attr(test, derive(PartialEq, Eq))] enum ReshardingEventType { /// Split of a shard. Split(ReshardingSplitParams), } +#[derive(Debug)] #[cfg_attr(test, derive(PartialEq, Eq))] struct ReshardingSplitParams { // Shard being split. @@ -267,31 +269,53 @@ fn event_type_from_shard_layout(shard_layout: &ShardLayout) -> Result { + // Supported. + } } - let event = None; - // Look for a shard having exactly two children, to trigger a split. - for shard in shard_layout.shard_ids() { - let parent = shard_layout.get_parent_shard_id(shard)?; - if let Some(children) = shard_layout.get_children_shards_uids(parent) { + let mut event = None; + + let error_two_reshardings = || { + let err_msg = "can't perform two reshardings at the same time!"; + error!(target: "resharding", ?shard_layout, err_msg); + return Err(Error::ReshardingError(err_msg.to_owned())); + }; + + for shard_id in shard_layout.shard_ids() { + // Look for a shard having exactly two children, to detect a split. + // - retrieve the parent shard + // - if the parent has two children create the split event + let parent_shard_id = shard_layout.get_parent_shard_id(shard_id)?; + if let Some(children) = shard_layout.get_children_shards_uids(parent_shard_id) { if children.len() == 2 { - if event.is_none() { - let parent_shard = ShardUId::from_shard_id_and_layout(parent, &shard_layout); - let left_child_shard = children[0]; - let right_child_shard = children[1]; - event = Some(ReshardingEventType::Split(ReshardingSplitParams { - parent_shard, - left_child_shard, - right_child_shard, - })) - } else { - let err_msg = "can't perform two reshardings at the same time!"; - error!(target: "resharding", ?shard_layout, err_msg); - return Err(Error::ReshardingError(err_msg.to_owned())); + match &event { + None => { + let parent_shard = ShardUId { + version: shard_layout.version(), + shard_id: parent_shard_id as u32, + }; + let left_child_shard = children[0]; + let right_child_shard = children[1]; + event = Some(ReshardingEventType::Split(ReshardingSplitParams { + parent_shard, + left_child_shard, + right_child_shard, + })) + } + Some(ReshardingEventType::Split(split)) => { + // It's fine only if this shard is already a child of the existing event. + if split.left_child_shard.shard_id() != shard_id + && split.right_child_shard.shard_id() != shard_id + { + return error_two_reshardings(); + } + } } } } } + // We must have found at least one resharding event by now. event.ok_or_else(|| { let err_msg = "no supported shard layout change found!"; error!(target: "resharding", ?shard_layout, err_msg); @@ -415,10 +439,7 @@ fn shard_split_handle_key_value( // Derive the shard uid for this account in the new shard layout. let account_id = account_id_parser(&key)?; let new_shard_id = account_id_to_shard_id(&account_id, shard_layout); - let mut new_shard_uid = ShardUId::from_shard_id_and_layout(new_shard_id, &shard_layout); - - // TODO(Trisfald): HACK until ShardLayoutV2 is there. - new_shard_uid.shard_id += 1; + let new_shard_uid = ShardUId::from_shard_id_and_layout(new_shard_id, &shard_layout); // Sanity check we are truly writing to one of the expected children shards. if new_shard_uid != *left_child_shard && new_shard_uid != *right_child_shard { @@ -566,7 +587,7 @@ pub trait FlatStorageResharderScheduler { #[cfg(test)] mod tests { - use std::{cell::RefCell, time::Duration}; + use std::{cell::RefCell, collections::BTreeMap, time::Duration}; use near_async::time::Clock; use near_chain_configs::{Genesis, MutableConfigValue}; @@ -623,14 +644,14 @@ mod tests { /// Simple shard layout with two shards. fn simple_shard_layout() -> ShardLayout { - // TODO(Trisfald): use shard layout v2 - ShardLayout::v1(vec![account!("ff")], None, 3) + let shards_split_map = BTreeMap::from([(0, vec![0]), (1, vec![1])]); + ShardLayout::v2(vec![account!("ff")], vec![0, 1], Some(shards_split_map)) } /// Derived from [simple_shard_layout] by splitting the second shard. fn shard_layout_after_split() -> ShardLayout { - // TODO(Trisfald): use shard layout v2 - ShardLayout::v1(vec![account!("ff"), account!("pp")], Some(vec![vec![0], vec![1, 2]]), 3) + let shards_split_map = BTreeMap::from([(0, vec![0]), (1, vec![2, 3])]); + ShardLayout::v2(vec![account!("ff"), account!("pp")], vec![0, 2, 3], Some(shards_split_map)) } /// Generic test setup. @@ -680,16 +701,22 @@ mod tests { // Single split shard is ok. let layout = shard_layout_after_split(); - let _event_type = event_type_from_shard_layout(&layout); - // TODO(Trisfald): it won't work until we have shard layout v2. - // assert_eq!(event_type, ReshardingEventType::Split(...)); + let event_type = event_type_from_shard_layout(&layout).unwrap(); + assert_eq!( + event_type, + ReshardingEventType::Split(ReshardingSplitParams { + parent_shard: ShardUId { version: 3, shard_id: 1 }, + left_child_shard: ShardUId { version: 3, shard_id: 2 }, + right_child_shard: ShardUId { version: 3, shard_id: 3 } + }) + ); // Double split shard is not ok. - // TODO(Trisfald): use shard layout v2 - let layout = ShardLayout::v1( + let shards_split_map = BTreeMap::from([(0, vec![2, 3]), (1, vec![4, 5])]); + let layout = ShardLayout::v2( vec![account!("ff"), account!("pp"), account!("ss")], - Some(vec![vec![0, 2], vec![1, 2]]), - 3, + vec![2, 3, 4, 5], + Some(shards_split_map), ); assert!(event_type_from_shard_layout(&layout).is_err()); } @@ -698,19 +725,30 @@ mod tests { #[test] fn concurrent_reshardings_are_disallowed() { init_test_logger(); - let _resharder = create_fs_resharder(simple_shard_layout()); - let _new_shard_layout = shard_layout_after_split(); + let (_, resharder) = create_fs_resharder(simple_shard_layout()); + let new_shard_layout = shard_layout_after_split(); + let scheduler = DelayedScheduler::default(); let controller = FlatStorageResharderController::new(); - // TODO(Trisfald): it won't work until we have shard layout v2. - - // assert!(resharder.start_resharding_from_new_shard_layout(&new_shard_layout).is_ok()); + assert!(resharder + .start_resharding_from_new_shard_layout( + &new_shard_layout, + &scheduler, + controller.clone() + ) + .is_ok()); // Immediately interrupt the resharding. controller.handle().stop(); - // assert!(resharder.resharding_event.lock().unwrap().is_some()); - // assert!(resharder.start_resharding_from_new_shard_layout(&new_shard_layout).is_err()); + assert!(resharder.resharding_event().is_some()); + assert!(resharder + .start_resharding_from_new_shard_layout( + &new_shard_layout, + &scheduler, + controller.clone() + ) + .is_err()); } /// Flat storage shard status should be set correctly upon starting a shard split. @@ -718,12 +756,14 @@ mod tests { fn flat_storage_split_status_set() { init_test_logger(); let (_, resharder) = create_fs_resharder(simple_shard_layout()); - let _new_shard_layout = shard_layout_after_split(); + let new_shard_layout = shard_layout_after_split(); + let scheduler = DelayedScheduler::default(); + let controller = FlatStorageResharderController::new(); let flat_store = resharder.inner.runtime.store().flat_store(); - // TODO(Trisfald): it won't work until we have shard layout v2. - - // assert!(resharder.start_resharding_from_new_shard_layout(&new_shard_layout).is_ok()); + assert!(resharder + .start_resharding_from_new_shard_layout(&new_shard_layout, &scheduler, controller) + .is_ok()); let resharding_event = resharder.resharding_event(); match resharding_event.unwrap() { diff --git a/core/primitives/src/shard_layout.rs b/core/primitives/src/shard_layout.rs index c825282dcd3..b8b0caaa0c8 100644 --- a/core/primitives/src/shard_layout.rs +++ b/core/primitives/src/shard_layout.rs @@ -149,7 +149,16 @@ impl ShardLayoutV1 { } /// Making the shard ids non-contiguous. -#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] +#[derive( + BorshSerialize, + BorshDeserialize, + serde::Serialize, + serde::Deserialize, + Clone, + Debug, + PartialEq, + Eq, +)] pub struct ShardLayoutV2 { /// The boundary accounts are the accounts on boundaries between shards. /// Each shard contains a range of accounts from one boundary account to From b9f11d4cc28bff2b6bf2e73131e53163199fa2da Mon Sep 17 00:00:00 2001 From: Trisfald Date: Tue, 1 Oct 2024 11:31:16 +0200 Subject: [PATCH 17/36] add parent shard deletion in task --- chain/chain/src/flat_storage_resharder.rs | 27 +++++------------------ core/store/src/adapter/flat_store.rs | 6 +++++ core/store/src/flat/types.rs | 3 --- 3 files changed, 12 insertions(+), 24 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index f8930643831..4c0f7c1902c 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -102,10 +102,6 @@ impl FlatStorageResharder { // TODO(Trisfald): implement child catch up todo!() } - FlatStorageReshardingStatus::ToBeDeleted => { - // Parent shard's content has been previously copied to the children. - // Nothing else to do. - } } Ok(()) } @@ -501,11 +497,8 @@ fn split_shard_task_postprocessing(resharder: FlatStorageResharderInner, success let mut store_update = flat_store.store_update(); if success { // Split shard completed successfully. - // Parent flat storage can be later deleted. - store_update.set_flat_storage_status( - parent_shard, - FlatStorageStatus::Resharding(FlatStorageReshardingStatus::ToBeDeleted), - ); + // Parent flat storage can be deleted. + store_update.remove_flat_storage(parent_shard); // TODO(trisfald): trigger parent delete // Children must perform catchup. for child_shard in [left_child_shard, right_child_shard] { @@ -526,9 +519,7 @@ fn split_shard_task_postprocessing(resharder: FlatStorageResharderInner, success ); // Remove children shards leftovers. for child_shard in [left_child_shard, right_child_shard] { - store_update.remove_all_deltas(child_shard); - store_update.remove_all(child_shard); - store_update.remove_status(child_shard); + store_update.remove_flat_storage(child_shard); } } store_update.commit().unwrap(); @@ -743,11 +734,7 @@ mod tests { assert!(resharder.resharding_event().is_some()); assert!(resharder - .start_resharding_from_new_shard_layout( - &new_shard_layout, - &scheduler, - controller.clone() - ) + .start_resharding_from_new_shard_layout(&new_shard_layout, &scheduler, controller) .is_err()); } @@ -894,10 +881,8 @@ mod tests { // Check final status of children and parent flat storages. let parent = ShardUId { version: 3, shard_id: 1 }; - assert_eq!( - flat_store.get_flat_storage_status(parent), - Ok(FlatStorageStatus::Resharding(FlatStorageReshardingStatus::ToBeDeleted)) - ); + assert_eq!(flat_store.get_flat_storage_status(parent), Ok(FlatStorageStatus::Empty)); + assert_eq!(flat_store.iter(parent).count(), 0); let last_hash = chain.head().unwrap().last_block_hash; assert_eq!( flat_store.get_flat_storage_status(left_child), diff --git a/core/store/src/adapter/flat_store.rs b/core/store/src/adapter/flat_store.rs index 488985937dc..43977dd1baf 100644 --- a/core/store/src/adapter/flat_store.rs +++ b/core/store/src/adapter/flat_store.rs @@ -270,6 +270,12 @@ impl<'a> FlatStoreUpdateAdapter<'a> { self.remove_range_by_shard_uid(shard_uid, DBCol::FlatStateDeltaMetadata); } + pub fn remove_flat_storage(&mut self, shard_uid: ShardUId) { + self.remove_all_deltas(shard_uid); + self.remove_all(shard_uid); + self.remove_status(shard_uid); + } + // helper fn remove_range_by_shard_uid(&mut self, shard_uid: ShardUId, col: DBCol) { let key_from = shard_uid.to_bytes(); diff --git a/core/store/src/flat/types.rs b/core/store/src/flat/types.rs index 15e5b883608..2c97aa62060 100644 --- a/core/store/src/flat/types.rs +++ b/core/store/src/flat/types.rs @@ -82,7 +82,6 @@ impl Into for &FlatStorageStatus { FlatStorageReshardingStatus::SplittingParent(_) => 20, FlatStorageReshardingStatus::CreatingChild => 21, FlatStorageReshardingStatus::CatchingUp(_) => 22, - FlatStorageReshardingStatus::ToBeDeleted => 23, }, } } @@ -150,8 +149,6 @@ pub enum FlatStorageReshardingStatus { /// We apply deltas from disk until the head reaches final head. /// Includes block hash of flat storage head. CatchingUp(CryptoHash), - /// The shard does no longer exist and its content should be deleted. - ToBeDeleted, } /// Current step of fetching state to fill flat storage. From 7fe014e295b6d0a739e30880049716674c01e69b Mon Sep 17 00:00:00 2001 From: Trisfald Date: Tue, 1 Oct 2024 11:37:38 +0200 Subject: [PATCH 18/36] update protocol schema --- core/primitives/src/shard_layout.rs | 1 + .../res/protocol_schema.toml | 23 +++++++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/core/primitives/src/shard_layout.rs b/core/primitives/src/shard_layout.rs index b8b0caaa0c8..fe3d530971b 100644 --- a/core/primitives/src/shard_layout.rs +++ b/core/primitives/src/shard_layout.rs @@ -58,6 +58,7 @@ pub type ShardVersion = u32; Debug, PartialEq, Eq, + ProtocolSchema, )] pub enum ShardLayout { V0(ShardLayoutV0), diff --git a/tools/protocol-schema-check/res/protocol_schema.toml b/tools/protocol-schema-check/res/protocol_schema.toml index 7c138ad6180..64c2690aa82 100644 --- a/tools/protocol-schema-check/res/protocol_schema.toml +++ b/tools/protocol-schema-check/res/protocol_schema.toml @@ -103,11 +103,11 @@ EpochInfoV4 = 434230701 EpochSummary = 742414117 EpochValidatorInfo = 378323971 ExecutionMetadata = 3853243413 -ExecutionOutcome = 1325623645 -ExecutionOutcomeWithId = 36999569 -ExecutionOutcomeWithIdAndProof = 4044381219 -ExecutionOutcomeWithProof = 4104315440 -ExecutionStatus = 1810006625 +ExecutionOutcome = 2925419955 +ExecutionOutcomeWithId = 1289816961 +ExecutionOutcomeWithIdAndProof = 3179626578 +ExecutionOutcomeWithProof = 2219338929 +ExecutionStatus = 3681865123 ExtCosts = 1172935704 FetchingStateStatus = 2204896805 FlatStateChanges = 2811133731 @@ -115,7 +115,8 @@ FlatStateDeltaMetadata = 3401366797 FlatStateValue = 83834662 FlatStorageCreationStatus = 3717607657 FlatStorageReadyStatus = 677315221 -FlatStorageStatus = 1026335026 +FlatStorageReshardingStatus = 2079135057 +FlatStorageStatus = 1510148841 FunctionCallAction = 2405840012 FunctionCallError = 3652274053 FunctionCallPermission = 1517509673 @@ -126,7 +127,7 @@ HostError = 3173968216 IgnoredVecU8 = 1855789801 IntegerOverflowError = 2542362165 InvalidAccessKeyError = 2954698659 -InvalidTxError = 1219344901 +InvalidTxError = 2090866399 KeyForFlatStateDelta = 2002998927 LatestKnown = 2945167085 LatestWitnessesInfo = 2488443612 @@ -183,7 +184,7 @@ RoutedMessageBody = 4241045537 RoutingTableUpdate = 2987752645 Secp256K1PublicKey = 4117078281 Secp256K1Signature = 3687154735 -ServerError = 3794571225 +ServerError = 2338793369 ShardChunk = 834871798 ShardChunkHeader = 4215449923 ShardChunkHeaderInner = 3760333502 @@ -195,6 +196,7 @@ ShardChunkHeaderV2 = 3706194757 ShardChunkHeaderV3 = 2763275079 ShardChunkV1 = 1814805625 ShardChunkV2 = 1857597167 +ShardLayout = 3421343543 ShardProof = 2773021473 ShardStateSyncResponse = 2185281594 ShardStateSyncResponseHeaderV1 = 2708725662 @@ -209,6 +211,7 @@ SignedTransaction = 3898692301 SlashState = 3264273950 SlashedValidator = 2601657743 SnapshotHostInfo = 278564957 +SplittingParentStatus = 4074912992 StakeAction = 2002027105 StateChangeCause = 1569242014 StateHeaderKey = 1385533899 @@ -222,7 +225,7 @@ StateStoredReceipt = 3853311293 StateStoredReceiptMetadata = 2895538362 StateStoredReceiptV0 = 4029868827 StateSyncDumpProgress = 2225888613 -StorageError = 1838871872 +StorageError = 2572184728 StoredChunkStateTransitionData = 516372819 String = 2587724713 SyncSnapshotHosts = 4230057383 @@ -236,7 +239,7 @@ TrieKey = 768968236 TrieQueueIndices = 2601394796 TrieRefcountAddition = 2117109883 TrieRefcountSubtraction = 2150368599 -TxExecutionError = 706862037 +TxExecutionError = 214948980 VMKind = 2110212047 ValidatorKickoutReason = 2362237969 ValidatorKickoutView = 2660746751 From d02fc52015f2d3562bb22075c5d189eee026034d Mon Sep 17 00:00:00 2001 From: Trisfald Date: Tue, 1 Oct 2024 11:38:36 +0200 Subject: [PATCH 19/36] remove todo --- chain/chain/src/flat_storage_resharder.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index 4c0f7c1902c..19a3a1ced54 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -499,7 +499,6 @@ fn split_shard_task_postprocessing(resharder: FlatStorageResharderInner, success // Split shard completed successfully. // Parent flat storage can be deleted. store_update.remove_flat_storage(parent_shard); - // TODO(trisfald): trigger parent delete // Children must perform catchup. for child_shard in [left_child_shard, right_child_shard] { store_update.set_flat_storage_status( From cf8ec9585d8078d3f9084412de2659ce4af3d652 Mon Sep 17 00:00:00 2001 From: Trisfald Date: Tue, 1 Oct 2024 11:48:30 +0200 Subject: [PATCH 20/36] remove parent flat storage through the manager --- chain/chain/src/flat_storage_resharder.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index 19a3a1ced54..0d4b6404c00 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -497,7 +497,12 @@ fn split_shard_task_postprocessing(resharder: FlatStorageResharderInner, success let mut store_update = flat_store.store_update(); if success { // Split shard completed successfully. - // Parent flat storage can be deleted. + // Parent flat storage can be deleted from the FlatStoreManager. + resharder + .runtime + .get_flat_storage_manager() + .remove_flat_storage_for_shard(parent_shard, &mut store_update) + .unwrap(); store_update.remove_flat_storage(parent_shard); // Children must perform catchup. for child_shard in [left_child_shard, right_child_shard] { @@ -878,10 +883,18 @@ mod tests { // Controller should signal that resharding ended. assert_eq!(controller.completion_receiver.recv_timeout(Duration::from_secs(1)), Ok(true)); - // Check final status of children and parent flat storages. + // Check final status of parent flat storage. let parent = ShardUId { version: 3, shard_id: 1 }; assert_eq!(flat_store.get_flat_storage_status(parent), Ok(FlatStorageStatus::Empty)); assert_eq!(flat_store.iter(parent).count(), 0); + assert!(resharder + .inner + .runtime + .get_flat_storage_manager() + .get_flat_storage_for_shard(parent) + .is_none()); + + // Check final status of children flat storages. let last_hash = chain.head().unwrap().last_block_hash; assert_eq!( flat_store.get_flat_storage_status(left_child), From f45cc9e64f45a34af4025529593ae91678422639 Mon Sep 17 00:00:00 2001 From: Trisfald Date: Tue, 1 Oct 2024 14:49:52 +0200 Subject: [PATCH 21/36] multi line imports --- chain/chain/src/flat_storage_resharder.rs | 37 ++++++++++------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index 0d4b6404c00..d3cfcae5c2f 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -7,30 +7,25 @@ use std::sync::{Arc, Mutex}; use crossbeam_channel::{Receiver, Sender}; use near_chain_configs::ReshardingHandle; use near_chain_primitives::Error; -use near_primitives::{ - shard_layout::{account_id_to_shard_id, ShardLayout}, - state::FlatStateValue, - trie_key::{ - col::{self, ALL_COLUMNS_WITH_NAMES}, - trie_key_parsers::{ - parse_account_id_from_access_key_key, parse_account_id_from_account_key, - parse_account_id_from_contract_code_key, parse_account_id_from_contract_data_key, - parse_account_id_from_received_data_key, parse_account_id_from_trie_key_with_separator, - }, - }, - types::AccountId, -}; -use near_store::{ - adapter::{flat_store::FlatStoreUpdateAdapter, StoreAdapter}, - flat::{ - FlatStorageReadyStatus, FlatStorageReshardingStatus, FlatStorageStatus, - SplittingParentStatus, - }, - ShardUId, StorageError, -}; + use tracing::{debug, error, info, warn}; use crate::types::RuntimeAdapter; +use near_primitives::shard_layout::{account_id_to_shard_id, ShardLayout}; +use near_primitives::state::FlatStateValue; +use near_primitives::trie_key::col::{self, ALL_COLUMNS_WITH_NAMES}; +use near_primitives::trie_key::trie_key_parsers::{ + parse_account_id_from_access_key_key, parse_account_id_from_account_key, + parse_account_id_from_contract_code_key, parse_account_id_from_contract_data_key, + parse_account_id_from_received_data_key, parse_account_id_from_trie_key_with_separator, +}; +use near_primitives::types::AccountId; +use near_store::adapter::flat_store::FlatStoreUpdateAdapter; +use near_store::adapter::StoreAdapter; +use near_store::flat::{ + FlatStorageReadyStatus, FlatStorageReshardingStatus, FlatStorageStatus, SplittingParentStatus, +}; +use near_store::{ShardUId, StorageError}; /// `FlatStorageResharder` takes care of updating flat storage when a resharding event /// happens. From 3c188c53aac51a6b9a9213f8b3ea3f22349b15bb Mon Sep 17 00:00:00 2001 From: Trisfald Date: Tue, 1 Oct 2024 15:19:25 +0200 Subject: [PATCH 22/36] apply code review suggestions --- chain/chain/src/flat_storage_resharder.rs | 60 +++++++++++++++-------- 1 file changed, 40 insertions(+), 20 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index d3cfcae5c2f..6434eeaa907 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -53,7 +53,7 @@ pub struct FlatStorageResharder { #[derive(Clone)] struct FlatStorageResharderInner { runtime: Arc, - resharding_event: Arc>>, + resharding_event: Arc>>, } impl FlatStorageResharder { @@ -189,12 +189,12 @@ impl FlatStorageResharder { } } - fn set_resharding_event(&self, event: FlatStorageReshardingEvent) { + fn set_resharding_event(&self, event: FlatStorageReshardingEventStatus) { *self.inner.resharding_event.lock().unwrap() = Some(event); } /// Returns the current in-progress resharding event, if any. - pub fn resharding_event(&self) -> Option { + pub fn resharding_event(&self) -> Option { self.inner.resharding_event.lock().unwrap().clone() } @@ -206,7 +206,7 @@ impl FlatStorageResharder { scheduler: &dyn FlatStorageResharderScheduler, controller: FlatStorageResharderController, ) { - let event = FlatStorageReshardingEvent::Split(parent_shard, status.clone()); + let event = FlatStorageReshardingEventStatus::SplitShard(parent_shard, status.clone()); self.set_resharding_event(event); info!(target: "resharding", ?parent_shard, ?status,"scheduling flat storage shard split"); @@ -336,7 +336,7 @@ fn get_parent_shard_and_status( ) -> (ShardUId, SplittingParentStatus) { let event = resharder.resharding_event.lock().unwrap(); match event.as_ref() { - Some(FlatStorageReshardingEvent::Split(parent_shard, status)) => { + Some(FlatStorageReshardingEventStatus::SplitShard(parent_shard, status)) => { (*parent_shard, status.clone()) } None => panic!("a resharding event must exist!"), @@ -373,8 +373,10 @@ fn split_shard_task_impl( let mut iter_exhausted = false; for _ in 0..BATCH_SIZE { match iter.next() { - Some(Ok(kv)) => { - if let Err(err) = shard_split_handle_key_value(kv, &mut store_update, &status) { + Some(Ok((key, value))) => { + if let Err(err) = + shard_split_handle_key_value(key, value, &mut store_update, &status) + { error!(target: "resharding", ?err, "failed to handle flat storage key"); return false; } @@ -410,11 +412,11 @@ fn split_shard_task_impl( /// Handles the inheritance of a key-value pair from parent shard to children shards. fn shard_split_handle_key_value( - kv: (Vec, FlatStateValue), + key: Vec, + value: FlatStateValue, store_update: &mut FlatStoreUpdateAdapter, status: &SplittingParentStatus, ) -> Result<(), std::io::Error> { - let (key, value) = kv; if key.is_empty() { return Err(std::io::Error::new( std::io::ErrorKind::InvalidData, @@ -435,7 +437,7 @@ fn shard_split_handle_key_value( // Sanity check we are truly writing to one of the expected children shards. if new_shard_uid != *left_child_shard && new_shard_uid != *right_child_shard { let err_msg = "account id doesn't map to any child shard!"; - error!(target: "resharding", ?new_shard_uid, ?left_child_shard, ?right_child_shard, ?shard_layout, err_msg); + error!(target: "resharding", ?new_shard_uid, ?left_child_shard, ?right_child_shard, ?shard_layout, ?account_id, err_msg); return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, err_msg)); } // Add the new flat store entry. @@ -470,12 +472,30 @@ fn shard_split_handle_key_value( ALL_COLUMNS_WITH_NAMES[col::POSTPONED_RECEIPT as usize].1, ) })?, - col::DELAYED_RECEIPT_OR_INDICES => todo!(), - col::PROMISE_YIELD_INDICES => todo!(), - col::PROMISE_YIELD_TIMEOUT => todo!(), - col::PROMISE_YIELD_RECEIPT => todo!(), - col::BUFFERED_RECEIPT_INDICES => todo!(), - col::BUFFERED_RECEIPT => todo!(), + col::DELAYED_RECEIPT_OR_INDICES => { + // TODO(trisfald): implement logic and remove error log + error!(target: "resharding", "flat storage resharding of col::DELAYED_RECEIPT_OR_INDICES is not implemented yet!"); + } + col::PROMISE_YIELD_INDICES => { + // TODO(trisfald): implement logic and remove error log + error!(target: "resharding", "flat storage resharding of col::PROMISE_YIELD_INDICES is not implemented yet!"); + } + col::PROMISE_YIELD_TIMEOUT => { + // TODO(trisfald): implement logic and remove error log + error!(target: "resharding", "flat storage resharding of col::PROMISE_YIELD_TIMEOUT is not implemented yet!"); + } + col::PROMISE_YIELD_RECEIPT => { + // TODO(trisfald): implement logic and remove error log + error!(target: "resharding", "flat storage resharding of col::PROMISE_YIELD_RECEIPT is not implemented yet!"); + } + col::BUFFERED_RECEIPT_INDICES => { + // TODO(trisfald): implement logic and remove error log + error!(target: "resharding", "flat storage resharding of col::BUFFERED_RECEIPT_INDICES is not implemented yet!"); + } + col::BUFFERED_RECEIPT => { + // TODO(trisfald): implement logic and remove error log + error!(target: "resharding", "flat storage resharding of col::BUFFERED_RECEIPT is not implemented yet!"); + } _ => unreachable!(), } Ok(()) @@ -487,7 +507,7 @@ fn split_shard_task_postprocessing(resharder: FlatStorageResharderInner, success let (parent_shard, status) = get_parent_shard_and_status(&resharder); let SplittingParentStatus { left_child_shard, right_child_shard, flat_head, .. } = status; let flat_store = resharder.runtime.store().flat_store(); - info!(target: "resharding", ?parent_shard, "flat storage shard split task: post-processing"); + info!(target: "resharding", ?parent_shard, ?success, ?status, "flat storage shard split task: post-processing"); let mut store_update = flat_store.store_update(); if success { @@ -528,10 +548,10 @@ fn split_shard_task_postprocessing(resharder: FlatStorageResharderInner, success /// Struct to describe, perform and track progress of a flat storage resharding. #[derive(Clone, Debug)] -pub enum FlatStorageReshardingEvent { +pub enum FlatStorageReshardingEventStatus { /// Split a shard. /// Includes the parent shard uid and the operation' status. - Split(ShardUId, SplittingParentStatus), + SplitShard(ShardUId, SplittingParentStatus), } /// Helps control the flat storage resharder operation. More specifically, @@ -753,7 +773,7 @@ mod tests { let resharding_event = resharder.resharding_event(); match resharding_event.unwrap() { - FlatStorageReshardingEvent::Split(parent, status) => { + FlatStorageReshardingEventStatus::SplitShard(parent, status) => { assert_eq!( flat_store.get_flat_storage_status(parent), Ok(FlatStorageStatus::Resharding( From ac6d642c98cfcdfd113a43db97e00d1f5314933d Mon Sep 17 00:00:00 2001 From: Trisfald Date: Tue, 1 Oct 2024 15:51:44 +0200 Subject: [PATCH 23/36] add note about parent shard flat storage invariant --- chain/chain/src/flat_storage_resharder.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index 6434eeaa907..906255ab6c2 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -360,7 +360,12 @@ fn split_shard_task_impl( let (parent_shard, status) = get_parent_shard_and_status(&resharder); - info!(target: "resharding", ?parent_shard, "flat storage shard split task: starting key-values copy"); + let parent_flat_head = status.flat_head; + info!(target: "resharding", ?parent_shard, ?parent_flat_head, "flat storage shard split task: starting key-values copy"); + + // Parent shard flat storage must be on the same height as the chain head. This guarantees that all + // deltas have been applied and thus the state of all key-values is up to date. + // TODO(trisfald): do this check, maybe call update_flat_storage_for_shard // Prepare the store object for commits and the iterator over parent's flat storage. let flat_store = resharder.runtime.store().flat_store(); From 9269c373e62969aec69d5d53a01016a4a4db93fe Mon Sep 17 00:00:00 2001 From: Trisfald Date: Tue, 1 Oct 2024 17:18:36 +0200 Subject: [PATCH 24/36] add test reject_split_shard_if_parent_is_not_ready --- chain/chain/src/flat_storage_resharder.rs | 42 +++++++++++++++-------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index 906255ab6c2..4865533d7db 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -872,20 +872,13 @@ mod tests { let scheduler = TestScheduler {}; let controller = FlatStorageResharderController::new(); - let result = resharder.split_shard( - ReshardingSplitParams { - parent_shard: ShardUId { version: 3, shard_id: 1 }, - left_child_shard: ShardUId { version: 3, shard_id: 2 }, - right_child_shard: ShardUId { version: 3, shard_id: 3 }, - }, - &new_shard_layout, - &scheduler, - controller.clone(), - ); - // TODO(Trisfald): replace the above with this simple call - // let result = - // resharder.start_resharding_from_new_shard_layout(&new_shard_layout, &scheduler); - assert!(result.is_ok()); + assert!(resharder + .start_resharding_from_new_shard_layout( + &new_shard_layout, + &scheduler, + controller.clone() + ) + .is_ok()); // Check flat storages of children contain the correct accounts. let left_child = ShardUId { version: 3, shard_id: 2 }; @@ -966,4 +959,25 @@ mod tests { assert_eq!(flat_store.iter(child_shard).count(), 0); } } + + /// A shard can't be split if it isn't in ready state. + #[test] + fn reject_split_shard_if_parent_is_not_ready() { + let (_, resharder) = create_fs_resharder(simple_shard_layout()); + let new_shard_layout = shard_layout_after_split(); + let scheduler = TestScheduler {}; + let controller = FlatStorageResharderController::new(); + + // Make flat storage of parent shard not ready. + let parent_shard = ShardUId { version: 3, shard_id: 1 }; + let flat_store = resharder.inner.runtime.store().flat_store(); + let mut store_update = flat_store.store_update(); + store_update.set_flat_storage_status(parent_shard, FlatStorageStatus::Empty); + store_update.commit().unwrap(); + + // Trigger resharding and it should fail. + assert!(resharder + .start_resharding_from_new_shard_layout(&new_shard_layout, &scheduler, controller) + .is_err()); + } } From 7ad602e2729f08ae8f42cdb5e74bb93b4c179222 Mon Sep 17 00:00:00 2001 From: Trisfald Date: Tue, 1 Oct 2024 17:29:43 +0200 Subject: [PATCH 25/36] use a dedicated type to represent task statuses --- chain/chain/src/flat_storage_resharder.rs | 112 +++++++++++++--------- 1 file changed, 66 insertions(+), 46 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index 4865533d7db..7557bd19bc4 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -321,10 +321,10 @@ fn split_shard_task( resharder: FlatStorageResharderInner, controller: FlatStorageResharderController, ) { - let success = split_shard_task_impl(resharder.clone(), controller.clone()); - split_shard_task_postprocessing(resharder, success); - info!(target: "resharding", "flat storage shard split task finished, success: {success}"); - if let Err(err) = controller.completion_sender.send(success) { + let task_status = split_shard_task_impl(resharder.clone(), controller.clone()); + split_shard_task_postprocessing(resharder, task_status); + info!(target: "resharding", ?task_status, "flat storage shard split task finished"); + if let Err(err) = controller.completion_sender.send(task_status) { warn!(target: "resharding", ?err, "error notifying completion of flat storage shard split task") }; } @@ -349,9 +349,9 @@ fn get_parent_shard_and_status( fn split_shard_task_impl( resharder: FlatStorageResharderInner, controller: FlatStorageResharderController, -) -> bool { +) -> FlatStorageReshardingTaskStatus { if controller.is_interrupted() { - return false; + return FlatStorageReshardingTaskStatus::Interrupted; } /// Determines after how many key-values the process stops to @@ -383,12 +383,12 @@ fn split_shard_task_impl( shard_split_handle_key_value(key, value, &mut store_update, &status) { error!(target: "resharding", ?err, "failed to handle flat storage key"); - return false; + return FlatStorageReshardingTaskStatus::Failed; } } Some(Err(err)) => { error!(target: "resharding", ?err, "failed to read flat storage value from parent shard"); - return false; + return FlatStorageReshardingTaskStatus::Failed; } None => { iter_exhausted = true; @@ -399,7 +399,7 @@ fn split_shard_task_impl( // Make a pause to commit and check if the routine should stop. if let Err(err) = store_update.commit() { error!(target: "resharding", ?err, "failed to commit store update"); - return false; + return FlatStorageReshardingTaskStatus::Failed; } // TODO(Trisfald): metrics and logs @@ -409,10 +409,10 @@ fn split_shard_task_impl( break; } if controller.is_interrupted() { - return false; + return FlatStorageReshardingTaskStatus::Interrupted; } } - true + FlatStorageReshardingTaskStatus::Successful } /// Handles the inheritance of a key-value pair from parent shard to children shards. @@ -508,42 +508,48 @@ fn shard_split_handle_key_value( /// Performs post-processing of shard splitting after all key-values have been moved from parent to children. /// `success` indicates whether or not the previous phase was successful. -fn split_shard_task_postprocessing(resharder: FlatStorageResharderInner, success: bool) { - let (parent_shard, status) = get_parent_shard_and_status(&resharder); - let SplittingParentStatus { left_child_shard, right_child_shard, flat_head, .. } = status; +fn split_shard_task_postprocessing( + resharder: FlatStorageResharderInner, + task_status: FlatStorageReshardingTaskStatus, +) { + let (parent_shard, split_status) = get_parent_shard_and_status(&resharder); + let SplittingParentStatus { left_child_shard, right_child_shard, flat_head, .. } = split_status; let flat_store = resharder.runtime.store().flat_store(); - info!(target: "resharding", ?parent_shard, ?success, ?status, "flat storage shard split task: post-processing"); + info!(target: "resharding", ?parent_shard, ?task_status, ?split_status, "flat storage shard split task: post-processing"); let mut store_update = flat_store.store_update(); - if success { - // Split shard completed successfully. - // Parent flat storage can be deleted from the FlatStoreManager. - resharder - .runtime - .get_flat_storage_manager() - .remove_flat_storage_for_shard(parent_shard, &mut store_update) - .unwrap(); - store_update.remove_flat_storage(parent_shard); - // Children must perform catchup. - for child_shard in [left_child_shard, right_child_shard] { + match task_status { + FlatStorageReshardingTaskStatus::Successful => { + // Split shard completed successfully. + // Parent flat storage can be deleted from the FlatStoreManager. + resharder + .runtime + .get_flat_storage_manager() + .remove_flat_storage_for_shard(parent_shard, &mut store_update) + .unwrap(); + store_update.remove_flat_storage(parent_shard); + // Children must perform catchup. + for child_shard in [left_child_shard, right_child_shard] { + store_update.set_flat_storage_status( + child_shard, + FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CatchingUp( + flat_head.hash, + )), + ); + } + // TODO(trisfald): trigger catchup + } + FlatStorageReshardingTaskStatus::Failed | FlatStorageReshardingTaskStatus::Interrupted => { + // We got an error or an interrupt request. + // Reset parent. store_update.set_flat_storage_status( - child_shard, - FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CatchingUp( - flat_head.hash, - )), + parent_shard, + FlatStorageStatus::Ready(FlatStorageReadyStatus { flat_head }), ); - } - // TODO(trisfald): trigger catchup - } else { - // We got an error or an interrupt request. - // Reset parent. - store_update.set_flat_storage_status( - parent_shard, - FlatStorageStatus::Ready(FlatStorageReadyStatus { flat_head }), - ); - // Remove children shards leftovers. - for child_shard in [left_child_shard, right_child_shard] { - store_update.remove_flat_storage(child_shard); + // Remove children shards leftovers. + for child_shard in [left_child_shard, right_child_shard] { + store_update.remove_flat_storage(child_shard); + } } } store_update.commit().unwrap(); @@ -559,6 +565,14 @@ pub enum FlatStorageReshardingEventStatus { SplitShard(ShardUId, SplittingParentStatus), } +/// Status of a flat storage resharding task. +#[derive(Clone, Debug, Copy, Eq, PartialEq)] +pub enum FlatStorageReshardingTaskStatus { + Successful, + Failed, + Interrupted, +} + /// Helps control the flat storage resharder operation. More specifically, /// it has a way to know when the background task is done or to interrupt it. #[derive(Clone)] @@ -567,9 +581,9 @@ pub struct FlatStorageResharderController { handle: ReshardingHandle, /// This object will be used to signal when the background task is completed. /// A value of `true` means that the operation completed successfully. - completion_sender: Sender, + completion_sender: Sender, /// Corresponding receiver for `completion_sender`. - pub completion_receiver: Receiver, + pub completion_receiver: Receiver, } impl FlatStorageResharderController { @@ -894,7 +908,10 @@ mod tests { .is_ok_and(|val| val.is_some())); // Controller should signal that resharding ended. - assert_eq!(controller.completion_receiver.recv_timeout(Duration::from_secs(1)), Ok(true)); + assert_eq!( + controller.completion_receiver.recv_timeout(Duration::from_secs(1)), + Ok(FlatStorageReshardingTaskStatus::Successful) + ); // Check final status of parent flat storage. let parent = ShardUId { version: 3, shard_id: 1 }; @@ -946,7 +963,10 @@ mod tests { // Check that resharding was effectively interrupted. let flat_store = resharder.inner.runtime.store().flat_store(); - assert_eq!(controller.completion_receiver.recv_timeout(Duration::from_secs(1)), Ok(false)); + assert_eq!( + controller.completion_receiver.recv_timeout(Duration::from_secs(1)), + Ok(FlatStorageReshardingTaskStatus::Interrupted) + ); assert_eq!( flat_store.get_flat_storage_status(parent_shard), Ok(FlatStorageStatus::Ready(FlatStorageReadyStatus { flat_head })) From d0b3125a58b4b04d8c810e73f792270e99b3f921 Mon Sep 17 00:00:00 2001 From: Trisfald Date: Tue, 1 Oct 2024 17:45:30 +0200 Subject: [PATCH 26/36] refactor copy_kv_to_child into a free function --- chain/chain/src/flat_storage_resharder.rs | 137 +++++++++++----------- 1 file changed, 71 insertions(+), 66 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index 7557bd19bc4..eaeffea7cc6 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -429,77 +429,57 @@ fn shard_split_handle_key_value( )); } let key_column_prefix = key[0]; - let SplittingParentStatus { left_child_shard, right_child_shard, shard_layout, .. } = status; - - // Copies a key value pair to the correct child by matching the account id to the new shard. - let copy_kv_to_child = - |account_id_parser: fn(&[u8]) -> Result| -> Result<(), std::io::Error> { - // Derive the shard uid for this account in the new shard layout. - let account_id = account_id_parser(&key)?; - let new_shard_id = account_id_to_shard_id(&account_id, shard_layout); - let new_shard_uid = ShardUId::from_shard_id_and_layout(new_shard_id, &shard_layout); - - // Sanity check we are truly writing to one of the expected children shards. - if new_shard_uid != *left_child_shard && new_shard_uid != *right_child_shard { - let err_msg = "account id doesn't map to any child shard!"; - error!(target: "resharding", ?new_shard_uid, ?left_child_shard, ?right_child_shard, ?shard_layout, ?account_id, err_msg); - return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, err_msg)); - } - // Add the new flat store entry. - store_update.set(new_shard_uid, key, Some(value)); - Ok(()) - }; match key_column_prefix { - col::ACCOUNT => copy_kv_to_child(parse_account_id_from_account_key)?, - col::CONTRACT_DATA => copy_kv_to_child(parse_account_id_from_contract_data_key)?, - col::CONTRACT_CODE => copy_kv_to_child(parse_account_id_from_contract_code_key)?, - col::ACCESS_KEY => copy_kv_to_child(parse_account_id_from_access_key_key)?, - col::RECEIVED_DATA => copy_kv_to_child(parse_account_id_from_received_data_key)?, - col::POSTPONED_RECEIPT_ID => copy_kv_to_child(|raw_key: &[u8]| { - parse_account_id_from_trie_key_with_separator( - col::POSTPONED_RECEIPT_ID, - raw_key, - ALL_COLUMNS_WITH_NAMES[col::POSTPONED_RECEIPT_ID as usize].1, - ) - })?, - col::PENDING_DATA_COUNT => copy_kv_to_child(|raw_key: &[u8]| { - parse_account_id_from_trie_key_with_separator( - col::PENDING_DATA_COUNT, - raw_key, - ALL_COLUMNS_WITH_NAMES[col::PENDING_DATA_COUNT as usize].1, - ) - })?, - col::POSTPONED_RECEIPT => copy_kv_to_child(|raw_key: &[u8]| { - parse_account_id_from_trie_key_with_separator( - col::POSTPONED_RECEIPT, - raw_key, - ALL_COLUMNS_WITH_NAMES[col::POSTPONED_RECEIPT as usize].1, - ) - })?, - col::DELAYED_RECEIPT_OR_INDICES => { - // TODO(trisfald): implement logic and remove error log - error!(target: "resharding", "flat storage resharding of col::DELAYED_RECEIPT_OR_INDICES is not implemented yet!"); - } - col::PROMISE_YIELD_INDICES => { - // TODO(trisfald): implement logic and remove error log - error!(target: "resharding", "flat storage resharding of col::PROMISE_YIELD_INDICES is not implemented yet!"); - } - col::PROMISE_YIELD_TIMEOUT => { - // TODO(trisfald): implement logic and remove error log - error!(target: "resharding", "flat storage resharding of col::PROMISE_YIELD_TIMEOUT is not implemented yet!"); + col::ACCOUNT => { + copy_kv_to_child(&status, key, value, store_update, parse_account_id_from_account_key)? } - col::PROMISE_YIELD_RECEIPT => { - // TODO(trisfald): implement logic and remove error log - error!(target: "resharding", "flat storage resharding of col::PROMISE_YIELD_RECEIPT is not implemented yet!"); - } - col::BUFFERED_RECEIPT_INDICES => { - // TODO(trisfald): implement logic and remove error log - error!(target: "resharding", "flat storage resharding of col::BUFFERED_RECEIPT_INDICES is not implemented yet!"); + col::CONTRACT_DATA => copy_kv_to_child( + &status, + key, + value, + store_update, + parse_account_id_from_contract_data_key, + )?, + col::CONTRACT_CODE => copy_kv_to_child( + &status, + key, + value, + store_update, + parse_account_id_from_contract_code_key, + )?, + col::ACCESS_KEY => copy_kv_to_child( + &status, + key, + value, + store_update, + parse_account_id_from_access_key_key, + )?, + col::RECEIVED_DATA => copy_kv_to_child( + &status, + key, + value, + store_update, + parse_account_id_from_received_data_key, + )?, + col::POSTPONED_RECEIPT_ID | col::PENDING_DATA_COUNT | col::POSTPONED_RECEIPT => { + copy_kv_to_child(&status, key, value, store_update, |raw_key: &[u8]| { + parse_account_id_from_trie_key_with_separator( + key_column_prefix, + raw_key, + ALL_COLUMNS_WITH_NAMES[key_column_prefix as usize].1, + ) + })? } - col::BUFFERED_RECEIPT => { + col::DELAYED_RECEIPT_OR_INDICES + | col::PROMISE_YIELD_INDICES + | col::PROMISE_YIELD_TIMEOUT + | col::PROMISE_YIELD_RECEIPT + | col::BUFFERED_RECEIPT_INDICES + | col::BUFFERED_RECEIPT => { // TODO(trisfald): implement logic and remove error log - error!(target: "resharding", "flat storage resharding of col::BUFFERED_RECEIPT is not implemented yet!"); + let col_name = ALL_COLUMNS_WITH_NAMES[key_column_prefix as usize].1; + error!(target: "resharding", "flat storage resharding of {col_name} is not implemented yet!"); } _ => unreachable!(), } @@ -557,6 +537,31 @@ fn split_shard_task_postprocessing( *resharder.resharding_event.lock().unwrap() = None; } +/// Copies a key-value pair to the correct child shard by matching the account-id to the provided shard layout. +fn copy_kv_to_child( + status: &SplittingParentStatus, + key: Vec, + value: FlatStateValue, + store_update: &mut FlatStoreUpdateAdapter, + account_id_parser: impl FnOnce(&[u8]) -> Result, +) -> Result<(), std::io::Error> { + let SplittingParentStatus { left_child_shard, right_child_shard, shard_layout, .. } = &status; + // Derive the shard uid for this account in the new shard layout. + let account_id = account_id_parser(&key)?; + let new_shard_id = account_id_to_shard_id(&account_id, shard_layout); + let new_shard_uid = ShardUId::from_shard_id_and_layout(new_shard_id, &shard_layout); + + // Sanity check we are truly writing to one of the expected children shards. + if new_shard_uid != *left_child_shard && new_shard_uid != *right_child_shard { + let err_msg = "account id doesn't map to any child shard!"; + error!(target: "resharding", ?new_shard_uid, ?left_child_shard, ?right_child_shard, ?shard_layout, ?account_id, err_msg); + return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, err_msg)); + } + // Add the new flat store entry. + store_update.set(new_shard_uid, key, Some(value)); + Ok(()) +} + /// Struct to describe, perform and track progress of a flat storage resharding. #[derive(Clone, Debug)] pub enum FlatStorageReshardingEventStatus { From df869fc45dab7b29da823daaab18b9f5f3f91151 Mon Sep 17 00:00:00 2001 From: Trisfald Date: Wed, 2 Oct 2024 11:39:02 +0200 Subject: [PATCH 27/36] address code review comments --- chain/chain/src/flat_storage_resharder.rs | 13 +++++-------- core/store/src/adapter/flat_store.rs | 5 +++-- core/store/src/flat/storage.rs | 2 +- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index eaeffea7cc6..7131f0dab11 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -222,7 +222,7 @@ impl FlatStorageResharder { let mut store_update = self.inner.runtime.store().flat_store().store_update(); for child in [left_child_shard, right_child_shard] { store_update.remove_all_deltas(*child); - store_update.remove_all(*child); + store_update.remove_all_values(*child); } store_update.commit()?; Ok(()) @@ -421,12 +421,9 @@ fn shard_split_handle_key_value( value: FlatStateValue, store_update: &mut FlatStoreUpdateAdapter, status: &SplittingParentStatus, -) -> Result<(), std::io::Error> { +) -> Result<(), Error> { if key.is_empty() { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - "flat storage key is empty", - )); + panic!("flat storage key is empty!") } let key_column_prefix = key[0]; @@ -544,7 +541,7 @@ fn copy_kv_to_child( value: FlatStateValue, store_update: &mut FlatStoreUpdateAdapter, account_id_parser: impl FnOnce(&[u8]) -> Result, -) -> Result<(), std::io::Error> { +) -> Result<(), Error> { let SplittingParentStatus { left_child_shard, right_child_shard, shard_layout, .. } = &status; // Derive the shard uid for this account in the new shard layout. let account_id = account_id_parser(&key)?; @@ -555,7 +552,7 @@ fn copy_kv_to_child( if new_shard_uid != *left_child_shard && new_shard_uid != *right_child_shard { let err_msg = "account id doesn't map to any child shard!"; error!(target: "resharding", ?new_shard_uid, ?left_child_shard, ?right_child_shard, ?shard_layout, ?account_id, err_msg); - return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, err_msg)); + return Err(Error::ReshardingError(err_msg.to_string())); } // Add the new flat store entry. store_update.set(new_shard_uid, key, Some(value)); diff --git a/core/store/src/adapter/flat_store.rs b/core/store/src/adapter/flat_store.rs index 43977dd1baf..322f4dd09cc 100644 --- a/core/store/src/adapter/flat_store.rs +++ b/core/store/src/adapter/flat_store.rs @@ -234,7 +234,7 @@ impl<'a> FlatStoreUpdateAdapter<'a> { } } - pub fn remove_all(&mut self, shard_uid: ShardUId) { + pub fn remove_all_values(&mut self, shard_uid: ShardUId) { self.remove_range_by_shard_uid(shard_uid, DBCol::FlatState); } @@ -270,9 +270,10 @@ impl<'a> FlatStoreUpdateAdapter<'a> { self.remove_range_by_shard_uid(shard_uid, DBCol::FlatStateDeltaMetadata); } + /// Removes flat storage in its entirety for a shard: deltas, values and status. pub fn remove_flat_storage(&mut self, shard_uid: ShardUId) { self.remove_all_deltas(shard_uid); - self.remove_all(shard_uid); + self.remove_all_values(shard_uid); self.remove_status(shard_uid); } diff --git a/core/store/src/flat/storage.rs b/core/store/src/flat/storage.rs index 172928d35eb..cf01fe421a4 100644 --- a/core/store/src/flat/storage.rs +++ b/core/store/src/flat/storage.rs @@ -478,7 +478,7 @@ impl FlatStorage { ) -> Result<(), StorageError> { let guard = self.0.write().expect(super::POISONED_LOCK_ERR); let shard_uid = guard.shard_uid; - store_update.remove_all(shard_uid); + store_update.remove_all_values(shard_uid); store_update.remove_all_deltas(shard_uid); store_update.set_flat_storage_status(shard_uid, FlatStorageStatus::Empty); guard.update_delta_metrics(); From e1f3b5d5876bf0a8672d02f65fe28fa61c49e729 Mon Sep 17 00:00:00 2001 From: Trisfald Date: Wed, 2 Oct 2024 17:48:45 +0200 Subject: [PATCH 28/36] refactor ReshardingEventType --- chain/chain/src/flat_storage_resharder.rs | 253 +++++++++------------- chain/chain/src/resharding/event_type.rs | 184 ++++++++++++++++ chain/chain/src/resharding/manager.rs | 21 +- chain/chain/src/resharding/mod.rs | 1 + core/primitives/src/shard_layout.rs | 8 + core/store/src/flat/types.rs | 4 + 6 files changed, 307 insertions(+), 164 deletions(-) create mode 100644 chain/chain/src/resharding/event_type.rs diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index 7131f0dab11..0fce50e73ab 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -10,6 +10,7 @@ use near_chain_primitives::Error; use tracing::{debug, error, info, warn}; +use crate::resharding::event_type::{ReshardingEventType, ReshardingSplitShardParams}; use crate::types::RuntimeAdapter; use near_primitives::shard_layout::{account_id_to_shard_id, ShardLayout}; use near_primitives::state::FlatStateValue; @@ -101,39 +102,46 @@ impl FlatStorageResharder { Ok(()) } - /// Starts a resharding event deduced from the new shard layout provided. + /// Starts a resharding event. /// /// For now, only splitting a shard is supported. /// /// # Args: - /// * `shard_layout`: the new shard layout, it must contain a layout change or an error is returned + /// * `event_type`: the type of resharding event + /// * `shard_layout`: the new shard layout /// * `scheduler`: component used to schedule the background tasks /// * `controller`: manages the execution of the background tasks - pub fn start_resharding_from_new_shard_layout( + pub fn start_resharding( &self, + event_type: ReshardingEventType, shard_layout: &ShardLayout, scheduler: &dyn FlatStorageResharderScheduler, controller: FlatStorageResharderController, ) -> Result<(), Error> { - match event_type_from_shard_layout(&shard_layout)? { - ReshardingEventType::Split(params) => { - self.split_shard(params, shard_layout, scheduler, controller)? + match event_type { + ReshardingEventType::SplitShard(params) => { + self.split_shard(params, shard_layout, scheduler, controller) } - }; - Ok(()) + } } /// Starts the event of splitting a parent shard flat storage into two children. fn split_shard( &self, - split_params: ReshardingSplitParams, + split_params: ReshardingSplitShardParams, shard_layout: &ShardLayout, scheduler: &dyn FlatStorageResharderScheduler, controller: FlatStorageResharderController, ) -> Result<(), Error> { - let ReshardingSplitParams { parent_shard, left_child_shard, right_child_shard } = - split_params; - info!(target: "resharding", ?parent_shard, ?left_child_shard, ?right_child_shard, "initiating flat storage shard split"); + let ReshardingSplitShardParams { + parent_shard, + left_child_shard, + right_child_shard, + block_hash, + prev_block_hash, + .. + } = split_params; + info!(target: "resharding", ?split_params, "initiating flat storage shard split"); self.check_no_resharding_in_progress()?; // Parent shard must be in ready state. @@ -156,6 +164,8 @@ impl FlatStorageResharder { left_child_shard, right_child_shard, shard_layout: shard_layout.clone(), + block_hash, + prev_block_hash, flat_head, }; store_update.set_flat_storage_status( @@ -229,91 +239,6 @@ impl FlatStorageResharder { } } -/// Struct used to destructure a new shard layout definition into the resulting resharding event. -#[derive(Debug)] -#[cfg_attr(test, derive(PartialEq, Eq))] -enum ReshardingEventType { - /// Split of a shard. - Split(ReshardingSplitParams), -} - -#[derive(Debug)] -#[cfg_attr(test, derive(PartialEq, Eq))] -struct ReshardingSplitParams { - // Shard being split. - parent_shard: ShardUId, - // Child to the left of the account boundary. - left_child_shard: ShardUId, - // Child to the right of the account boundary. - right_child_shard: ShardUId, -} - -/// Takes as input a [ShardLayout] definition and deduces which kind of resharding operation must be -/// performed. -/// -/// Returns an error if there isn't any change in the shard layout that would require resharding. -fn event_type_from_shard_layout(shard_layout: &ShardLayout) -> Result { - // Resharding V3 supports shard layout V2 onwards. - match shard_layout { - ShardLayout::V0(_) | ShardLayout::V1(_) => { - let err_msg = "unsupported shard layout!"; - error!(target: "resharding", ?shard_layout, err_msg); - return Err(Error::ReshardingError(err_msg.to_owned())); - } - ShardLayout::V2(_) => { - // Supported. - } - } - - let mut event = None; - - let error_two_reshardings = || { - let err_msg = "can't perform two reshardings at the same time!"; - error!(target: "resharding", ?shard_layout, err_msg); - return Err(Error::ReshardingError(err_msg.to_owned())); - }; - - for shard_id in shard_layout.shard_ids() { - // Look for a shard having exactly two children, to detect a split. - // - retrieve the parent shard - // - if the parent has two children create the split event - let parent_shard_id = shard_layout.get_parent_shard_id(shard_id)?; - if let Some(children) = shard_layout.get_children_shards_uids(parent_shard_id) { - if children.len() == 2 { - match &event { - None => { - let parent_shard = ShardUId { - version: shard_layout.version(), - shard_id: parent_shard_id as u32, - }; - let left_child_shard = children[0]; - let right_child_shard = children[1]; - event = Some(ReshardingEventType::Split(ReshardingSplitParams { - parent_shard, - left_child_shard, - right_child_shard, - })) - } - Some(ReshardingEventType::Split(split)) => { - // It's fine only if this shard is already a child of the existing event. - if split.left_child_shard.shard_id() != shard_id - && split.right_child_shard.shard_id() != shard_id - { - return error_two_reshardings(); - } - } - } - } - } - } - // We must have found at least one resharding event by now. - event.ok_or_else(|| { - let err_msg = "no supported shard layout change found!"; - error!(target: "resharding", ?shard_layout, err_msg); - Error::ReshardingError(err_msg.to_owned()) - }) -} - /// Task to perform the actual split of a flat storage shard. This may be a long operation time-wise. /// /// Conceptually it simply copies each key-value pair from the parent shard to the correct child. @@ -359,13 +284,12 @@ fn split_shard_task_impl( const BATCH_SIZE: usize = 10_000; let (parent_shard, status) = get_parent_shard_and_status(&resharder); + info!(target: "resharding", ?parent_shard, ?status, "flat storage shard split task: starting key-values copy"); - let parent_flat_head = status.flat_head; - info!(target: "resharding", ?parent_shard, ?parent_flat_head, "flat storage shard split task: starting key-values copy"); - - // Parent shard flat storage must be on the same height as the chain head. This guarantees that all - // deltas have been applied and thus the state of all key-values is up to date. + // Parent shard flat storage head must be on block height just before the new shard layout kicks in. + // This guarantees that all deltas have been applied and thus the state of all key-values is up to date. // TODO(trisfald): do this check, maybe call update_flat_storage_for_shard + let _parent_flat_head = status.flat_head; // Prepare the store object for commits and the iterator over parent's flat storage. let flat_store = resharder.runtime.store().flat_store(); @@ -716,53 +640,28 @@ mod tests { MutableConfigValue::new(None, "validator_signer"), ) .unwrap(); - (chain, FlatStorageResharder::new(runtime)) } - /// Verify that the correct type of resharding is deduced from a new shard layout. - #[test] - fn parse_event_type_from_shard_layout() { - // No resharding is not ok. - assert!(event_type_from_shard_layout(&simple_shard_layout()).is_err()); - - // Shard layouts V0 and V1 are rejected. - assert!(event_type_from_shard_layout(&ShardLayout::v0_single_shard()).is_err()); - assert!(event_type_from_shard_layout(&ShardLayout::v1_test()).is_err()); - - // Single split shard is ok. - let layout = shard_layout_after_split(); - let event_type = event_type_from_shard_layout(&layout).unwrap(); - assert_eq!( - event_type, - ReshardingEventType::Split(ReshardingSplitParams { - parent_shard: ShardUId { version: 3, shard_id: 1 }, - left_child_shard: ShardUId { version: 3, shard_id: 2 }, - right_child_shard: ShardUId { version: 3, shard_id: 3 } - }) - ); - - // Double split shard is not ok. - let shards_split_map = BTreeMap::from([(0, vec![2, 3]), (1, vec![4, 5])]); - let layout = ShardLayout::v2( - vec![account!("ff"), account!("pp"), account!("ss")], - vec![2, 3, 4, 5], - Some(shards_split_map), - ); - assert!(event_type_from_shard_layout(&layout).is_err()); - } - /// Verify that another resharding can't be triggered if one is ongoing. #[test] fn concurrent_reshardings_are_disallowed() { init_test_logger(); - let (_, resharder) = create_fs_resharder(simple_shard_layout()); + let (chain, resharder) = create_fs_resharder(simple_shard_layout()); let new_shard_layout = shard_layout_after_split(); let scheduler = DelayedScheduler::default(); let controller = FlatStorageResharderController::new(); + let resharding_event_type = ReshardingEventType::from_shard_layout( + &new_shard_layout, + chain.head().unwrap().last_block_hash, + chain.head().unwrap().prev_block_hash, + ) + .unwrap() + .unwrap(); assert!(resharder - .start_resharding_from_new_shard_layout( + .start_resharding( + resharding_event_type.clone(), &new_shard_layout, &scheduler, controller.clone() @@ -774,7 +673,7 @@ mod tests { assert!(resharder.resharding_event().is_some()); assert!(resharder - .start_resharding_from_new_shard_layout(&new_shard_layout, &scheduler, controller) + .start_resharding(resharding_event_type, &new_shard_layout, &scheduler, controller) .is_err()); } @@ -782,14 +681,21 @@ mod tests { #[test] fn flat_storage_split_status_set() { init_test_logger(); - let (_, resharder) = create_fs_resharder(simple_shard_layout()); + let (chain, resharder) = create_fs_resharder(simple_shard_layout()); let new_shard_layout = shard_layout_after_split(); let scheduler = DelayedScheduler::default(); let controller = FlatStorageResharderController::new(); let flat_store = resharder.inner.runtime.store().flat_store(); + let resharding_event_type = ReshardingEventType::from_shard_layout( + &new_shard_layout, + chain.head().unwrap().last_block_hash, + chain.head().unwrap().prev_block_hash, + ) + .unwrap() + .unwrap(); assert!(resharder - .start_resharding_from_new_shard_layout(&new_shard_layout, &scheduler, controller) + .start_resharding(resharding_event_type, &new_shard_layout, &scheduler, controller) .is_ok()); let resharding_event = resharder.resharding_event(); @@ -818,14 +724,21 @@ mod tests { #[test] fn resume_split_starts_from_clean_state() { init_test_logger(); - let (_, resharder) = create_fs_resharder(simple_shard_layout()); + let (chain, resharder) = create_fs_resharder(simple_shard_layout()); let flat_store = resharder.inner.runtime.store().flat_store(); - let shard_layout = shard_layout_after_split(); - let resharding_type = event_type_from_shard_layout(&shard_layout).unwrap(); - let ReshardingSplitParams { parent_shard, left_child_shard, right_child_shard } = - match resharding_type { - ReshardingEventType::Split(params) => params, - }; + let new_shard_layout = shard_layout_after_split(); + let resharding_event_type = ReshardingEventType::from_shard_layout( + &new_shard_layout, + chain.head().unwrap().last_block_hash, + chain.head().unwrap().prev_block_hash, + ) + .unwrap() + .unwrap(); + let ReshardingSplitShardParams { + parent_shard, left_child_shard, right_child_shard, .. + } = match resharding_event_type { + ReshardingEventType::SplitShard(params) => params, + }; let mut store_update = flat_store.store_update(); @@ -839,10 +752,12 @@ mod tests { // Set parent state to ShardSplitting, manually, to simulate a forcibly interrupted resharding attempt. let resharding_status = FlatStorageReshardingStatus::SplittingParent(SplittingParentStatus { + // Values don't matter. left_child_shard, right_child_shard, - shard_layout, - // Values don't matter. + shard_layout: new_shard_layout, + block_hash: CryptoHash::default(), + prev_block_hash: CryptoHash::default(), flat_head: BlockInfo { hash: CryptoHash::default(), height: 1, @@ -887,9 +802,17 @@ mod tests { let new_shard_layout = shard_layout_after_split(); let scheduler = TestScheduler {}; let controller = FlatStorageResharderController::new(); + let resharding_event_type = ReshardingEventType::from_shard_layout( + &new_shard_layout, + chain.head().unwrap().last_block_hash, + chain.head().unwrap().prev_block_hash, + ) + .unwrap() + .unwrap(); assert!(resharder - .start_resharding_from_new_shard_layout( + .start_resharding( + resharding_event_type, &new_shard_layout, &scheduler, controller.clone() @@ -942,13 +865,21 @@ mod tests { fn interrupt_split_shard() { init_test_logger(); // Perform resharding. - let (_, resharder) = create_fs_resharder(simple_shard_layout()); + let (chain, resharder) = create_fs_resharder(simple_shard_layout()); let new_shard_layout = shard_layout_after_split(); let scheduler = DelayedScheduler::default(); let controller = FlatStorageResharderController::new(); + let resharding_event_type = ReshardingEventType::from_shard_layout( + &new_shard_layout, + chain.head().unwrap().last_block_hash, + chain.head().unwrap().prev_block_hash, + ) + .unwrap() + .unwrap(); assert!(resharder - .start_resharding_from_new_shard_layout( + .start_resharding( + resharding_event_type, &new_shard_layout, &scheduler, controller.clone() @@ -985,10 +916,17 @@ mod tests { /// A shard can't be split if it isn't in ready state. #[test] fn reject_split_shard_if_parent_is_not_ready() { - let (_, resharder) = create_fs_resharder(simple_shard_layout()); + let (chain, resharder) = create_fs_resharder(simple_shard_layout()); let new_shard_layout = shard_layout_after_split(); let scheduler = TestScheduler {}; let controller = FlatStorageResharderController::new(); + let resharding_event_type = ReshardingEventType::from_shard_layout( + &new_shard_layout, + chain.head().unwrap().last_block_hash, + chain.head().unwrap().prev_block_hash, + ) + .unwrap() + .unwrap(); // Make flat storage of parent shard not ready. let parent_shard = ShardUId { version: 3, shard_id: 1 }; @@ -999,7 +937,14 @@ mod tests { // Trigger resharding and it should fail. assert!(resharder - .start_resharding_from_new_shard_layout(&new_shard_layout, &scheduler, controller) + .start_resharding(resharding_event_type, &new_shard_layout, &scheduler, controller) .is_err()); } + + /// Verify that a shard can be split correctly even if its flat head is lagging behind the expected + /// block height. + #[test] + fn split_shard_parent_flat_store_lagging_behind() { + // TODO(Trisfald): implement + } } diff --git a/chain/chain/src/resharding/event_type.rs b/chain/chain/src/resharding/event_type.rs new file mode 100644 index 00000000000..68b756c1d8d --- /dev/null +++ b/chain/chain/src/resharding/event_type.rs @@ -0,0 +1,184 @@ +//! Collection of all resharding V3 event types. + +use near_chain_primitives::Error; +use near_primitives::hash::CryptoHash; +use near_primitives::shard_layout::ShardLayout; +use near_primitives::types::AccountId; +use near_store::ShardUId; +use tracing::error; + +/// Struct used to destructure a new shard layout definition into the resulting resharding event. +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq, Eq))] +pub enum ReshardingEventType { + /// Split of a shard. + SplitShard(ReshardingSplitShardParams), +} + +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq, Eq))] +pub struct ReshardingSplitShardParams { + // Shard being split. + pub parent_shard: ShardUId, + // Child to the left of the account boundary. + pub left_child_shard: ShardUId, + // Child to the right of the account boundary. + pub right_child_shard: ShardUId, + /// The account at the boundary between the two children. + pub boundary_account: AccountId, + /// Hash of the first block having the new shard layout. + pub block_hash: CryptoHash, + /// The block before `block_hash`. + pub prev_block_hash: CryptoHash, +} + +impl ReshardingEventType { + /// Takes as input a [ShardLayout] definition and deduces which kind of resharding operation must be + /// performed. + /// + /// # Args: + /// * `shard_layout`: the new shard layout + /// * `block_hash`: hash of the first block with `shard_layout` + /// * `prev_block_hash`: hash of the block preceding `block_hash` + /// + /// Returns a [ReshardingEventType] if exactly one resharding change is contained in `shard_layout`, otherwise returns `None`. + pub fn from_shard_layout( + shard_layout: &ShardLayout, + block_hash: CryptoHash, + prev_block_hash: CryptoHash, + ) -> Result, Error> { + let log_and_error = |err_msg: &str| { + error!(target: "resharding", ?shard_layout, err_msg); + Err(Error::ReshardingError(err_msg.to_owned())) + }; + + // Resharding V3 supports shard layout V2 onwards. + let (shards_split_map, boundary_accounts) = match shard_layout { + ShardLayout::V0(_) | ShardLayout::V1(_) => { + return log_and_error("unsupported shard layout!"); + } + ShardLayout::V2(layout) => { + let Some(shards_split_map) = layout.shards_split_map() else { + return log_and_error("ShardLayoutV2 must have a shards_split_map!"); + }; + (shards_split_map, layout.boundary_accounts()) + } + }; + + let mut event = None; + + // Look for a shard having exactly two children, to detect a split. + for (parent_id, children_ids) in shards_split_map { + match children_ids.len() { + 1 => {} + 2 => { + if event.is_some() { + return log_and_error("can't perform two reshardings at the same time!"); + } + // Parent shard is no longer part of this shard layout. + let parent_shard = + ShardUId { version: shard_layout.version(), shard_id: *parent_id as u32 }; + let left_child_shard = + ShardUId::from_shard_id_and_layout(children_ids[0], shard_layout); + let right_child_shard = + ShardUId::from_shard_id_and_layout(children_ids[1], shard_layout); + // Find the boundary account between the two children. + let Some(boundary_account_index) = + shard_layout.shard_ids().position(|id| id == left_child_shard.shard_id()) + else { + return log_and_error(&format!( + "shard {left_child_shard} not found in shard layout" + )); + }; + let boundary_account = boundary_accounts[boundary_account_index].clone(); + event = Some(ReshardingEventType::SplitShard(ReshardingSplitShardParams { + parent_shard, + left_child_shard, + right_child_shard, + boundary_account, + block_hash, + prev_block_hash, + })); + } + _ => { + return log_and_error(&format!( + "invalid number of children for shard {parent_id}" + )); + } + } + } + + // We may have found at least one resharding event by now. + Ok(event) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use near_primitives::shard_layout::ShardLayout; + use near_primitives::types::AccountId; + use near_store::ShardUId; + use std::collections::BTreeMap; + + /// Shorthand to create account ID. + macro_rules! account { + ($str:expr) => { + $str.parse::().unwrap() + }; + } + + /// Verify that the correct type of resharding is deduced from a new shard layout. + #[test] + fn parse_event_type_from_shard_layout() { + let block = CryptoHash::hash_bytes(&[1]); + let prev_block = CryptoHash::hash_bytes(&[2]); + + // Shard layouts V0 and V1 are rejected. + assert!(ReshardingEventType::from_shard_layout( + &ShardLayout::v0_single_shard(), + block, + prev_block + ) + .is_err()); + assert!(ReshardingEventType::from_shard_layout(&ShardLayout::v1_test(), block, prev_block) + .is_err()); + + // No resharding is ok. + let shards_split_map = BTreeMap::from([(0, vec![0])]); + let layout = ShardLayout::v2(vec![], vec![0], Some(shards_split_map)); + assert!(ReshardingEventType::from_shard_layout(&layout, block, prev_block) + .is_ok_and(|event| event.is_none())); + + // Single split shard is ok. + let shards_split_map = BTreeMap::from([(0, vec![0]), (1, vec![2, 3])]); + let layout = ShardLayout::v2( + vec![account!("ff"), account!("pp")], + vec![0, 2, 3], + Some(shards_split_map), + ); + + let event_type = + ReshardingEventType::from_shard_layout(&layout, block, prev_block).unwrap(); + assert_eq!( + event_type, + Some(ReshardingEventType::SplitShard(ReshardingSplitShardParams { + parent_shard: ShardUId { version: 3, shard_id: 1 }, + left_child_shard: ShardUId { version: 3, shard_id: 2 }, + right_child_shard: ShardUId { version: 3, shard_id: 3 }, + block_hash: block, + prev_block_hash: prev_block, + boundary_account: account!("pp") + })) + ); + + // Double split shard is not ok. + let shards_split_map = BTreeMap::from([(0, vec![2, 3]), (1, vec![4, 5])]); + let layout = ShardLayout::v2( + vec![account!("ff"), account!("pp"), account!("ss")], + vec![2, 3, 4, 5], + Some(shards_split_map), + ); + assert!(ReshardingEventType::from_shard_layout(&layout, block, prev_block).is_err()); + } +} diff --git a/chain/chain/src/resharding/manager.rs b/chain/chain/src/resharding/manager.rs index c641aefd37d..b8c95e402c2 100644 --- a/chain/chain/src/resharding/manager.rs +++ b/chain/chain/src/resharding/manager.rs @@ -1,6 +1,6 @@ -use std::str::FromStr; use std::sync::Arc; +use super::event_type::ReshardingEventType; use near_chain_configs::{MutableConfigValue, ReshardingConfig, ReshardingHandle}; use near_chain_primitives::Error; use near_epoch_manager::EpochManagerAdapter; @@ -10,7 +10,6 @@ use near_primitives::hash::CryptoHash; use near_primitives::shard_layout::get_block_shard_uid; use near_primitives::stateless_validation::stored_chunk_state_transition_data::StoredChunkStateTransitionData; use near_primitives::types::chunk_extra::ChunkExtra; -use near_primitives::types::AccountId; use near_primitives::utils::get_block_shard_id; use near_store::adapter::StoreUpdateAdapter; use near_store::trie::mem::resharding::RetainMode; @@ -54,15 +53,18 @@ impl ReshardingManager { let next_epoch_id = self.epoch_manager.get_next_epoch_id_from_prev_block(prev_hash)?; let next_shard_layout = self.epoch_manager.get_shard_layout(&next_epoch_id)?; - let children_shard_uids = - next_shard_layout.get_children_shards_uids(shard_uid.shard_id()).unwrap(); + let resharding_event_type = + ReshardingEventType::from_shard_layout(&next_shard_layout, *block_hash, *prev_hash); // Hack to ensure this logic is not applied before ReshardingV3. // TODO(#12019): proper logic. - if next_shard_layout.version() < 3 || children_shard_uids.len() == 1 { + if next_shard_layout.version() < 3 { return Ok(()); } - assert_eq!(children_shard_uids.len(), 2); + let Ok(Some(ReshardingEventType::SplitShard(split_shard_event))) = resharding_event_type + else { + return Ok(()); + }; let chunk_extra = self.get_chunk_extra(block_hash, &shard_uid)?; let Some(mem_tries) = tries.get_mem_tries(shard_uid) else { @@ -77,13 +79,12 @@ impl ReshardingManager { return Err(Error::Other("Memtrie not loaded".to_string())); }; - // TODO(#12019): take proper boundary account. - let boundary_account = AccountId::from_str("boundary.near").unwrap(); + let boundary_account = split_shard_event.boundary_account; // TODO(#12019): leave only tracked shards. for (new_shard_uid, retain_mode) in [ - (children_shard_uids[0], RetainMode::Left), - (children_shard_uids[1], RetainMode::Right), + (split_shard_event.left_child_shard, RetainMode::Left), + (split_shard_event.right_child_shard, RetainMode::Right), ] { let mut mem_tries = mem_tries.write().unwrap(); let mem_trie_update = mem_tries.update(*chunk_extra.state_root(), true)?; diff --git a/chain/chain/src/resharding/mod.rs b/chain/chain/src/resharding/mod.rs index 8a316f46fdd..f3e8410acc6 100644 --- a/chain/chain/src/resharding/mod.rs +++ b/chain/chain/src/resharding/mod.rs @@ -1,3 +1,4 @@ +pub mod event_type; pub mod manager; pub mod resharding_v2; diff --git a/core/primitives/src/shard_layout.rs b/core/primitives/src/shard_layout.rs index fe3d530971b..f2139e29ec1 100644 --- a/core/primitives/src/shard_layout.rs +++ b/core/primitives/src/shard_layout.rs @@ -204,6 +204,14 @@ impl ShardLayoutV2 { } self.shard_ids[shard_id_index] } + + pub fn shards_split_map(&self) -> &Option { + &self.shards_split_map + } + + pub fn boundary_accounts(&self) -> &Vec { + &self.boundary_accounts + } } #[derive(Debug)] diff --git a/core/store/src/flat/types.rs b/core/store/src/flat/types.rs index 2c97aa62060..f306368a289 100644 --- a/core/store/src/flat/types.rs +++ b/core/store/src/flat/types.rs @@ -185,6 +185,10 @@ pub struct SplittingParentStatus { pub right_child_shard: ShardUId, /// The new shard layout. pub shard_layout: ShardLayout, + /// Hash of the first block having the new shard layout. + pub block_hash: CryptoHash, + /// The block before `block_hash`. + pub prev_block_hash: CryptoHash, /// Parent's flat head state when the split began. pub flat_head: BlockInfo, } From 6d4188ce189984aca26550c74a8f130cbd705681 Mon Sep 17 00:00:00 2001 From: Trisfald Date: Wed, 2 Oct 2024 18:58:14 +0200 Subject: [PATCH 29/36] update protocol schema --- tools/protocol-schema-check/res/protocol_schema.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/protocol-schema-check/res/protocol_schema.toml b/tools/protocol-schema-check/res/protocol_schema.toml index 64c2690aa82..73de4e4b7c2 100644 --- a/tools/protocol-schema-check/res/protocol_schema.toml +++ b/tools/protocol-schema-check/res/protocol_schema.toml @@ -115,8 +115,8 @@ FlatStateDeltaMetadata = 3401366797 FlatStateValue = 83834662 FlatStorageCreationStatus = 3717607657 FlatStorageReadyStatus = 677315221 -FlatStorageReshardingStatus = 2079135057 -FlatStorageStatus = 1510148841 +FlatStorageReshardingStatus = 1905062447 +FlatStorageStatus = 3573153526 FunctionCallAction = 2405840012 FunctionCallError = 3652274053 FunctionCallPermission = 1517509673 @@ -211,7 +211,7 @@ SignedTransaction = 3898692301 SlashState = 3264273950 SlashedValidator = 2601657743 SnapshotHostInfo = 278564957 -SplittingParentStatus = 4074912992 +SplittingParentStatus = 3566488936 StakeAction = 2002027105 StateChangeCause = 1569242014 StateHeaderKey = 1385533899 From d6b5ab3caba6a0fef1abef52b09f8fc7292eb5cd Mon Sep 17 00:00:00 2001 From: Trisfald Date: Thu, 3 Oct 2024 10:57:31 +0200 Subject: [PATCH 30/36] improve code clarity --- chain/chain/src/flat_storage_resharder.rs | 35 ++++++++++++----------- core/store/src/flat/types.rs | 3 +- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index 0fce50e73ab..47470447485 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -21,10 +21,11 @@ use near_primitives::trie_key::trie_key_parsers::{ parse_account_id_from_received_data_key, parse_account_id_from_trie_key_with_separator, }; use near_primitives::types::AccountId; -use near_store::adapter::flat_store::FlatStoreUpdateAdapter; +use near_store::adapter::flat_store::{FlatStoreAdapter, FlatStoreUpdateAdapter}; use near_store::adapter::StoreAdapter; use near_store::flat::{ - FlatStorageReadyStatus, FlatStorageReshardingStatus, FlatStorageStatus, SplittingParentStatus, + BlockInfo, FlatStorageReadyStatus, FlatStorageReshardingStatus, FlatStorageStatus, + SplittingParentStatus, }; use near_store::{ShardUId, StorageError}; @@ -144,22 +145,10 @@ impl FlatStorageResharder { info!(target: "resharding", ?split_params, "initiating flat storage shard split"); self.check_no_resharding_in_progress()?; - // Parent shard must be in ready state. - let store = self.inner.runtime.store().flat_store(); - let flat_head = if let FlatStorageStatus::Ready(FlatStorageReadyStatus { flat_head }) = - store - .get_flat_storage_status(parent_shard) - .map_err(|err| Into::::into(err))? - { - flat_head - } else { - let err_msg = "flat storage parent shard is not ready!"; - error!(target: "resharding", ?parent_shard, err_msg); - return Err(Error::ReshardingError(err_msg.to_owned())); - }; - // Change parent and children shards flat storage status. + let store = self.inner.runtime.store().flat_store(); let mut store_update = store.store_update(); + let flat_head = retrieve_shard_flat_head(parent_shard, &store)?; let status = SplittingParentStatus { left_child_shard, right_child_shard, @@ -239,6 +228,20 @@ impl FlatStorageResharder { } } +/// Retrieves the flat head of the given `shard`. +/// The shard must be in [FlatStorageStatus::Ready] state otherwise this method returns an error. +fn retrieve_shard_flat_head(shard: ShardUId, store: &FlatStoreAdapter) -> Result { + let status = + store.get_flat_storage_status(shard).map_err(|err| Into::::into(err))?; + if let FlatStorageStatus::Ready(FlatStorageReadyStatus { flat_head }) = status { + Ok(flat_head) + } else { + let err_msg = "flat storage shard status is not ready!"; + error!(target: "resharding", ?shard, ?status, err_msg); + Err(Error::ReshardingError(err_msg.to_owned())) + } +} + /// Task to perform the actual split of a flat storage shard. This may be a long operation time-wise. /// /// Conceptually it simply copies each key-value pair from the parent shard to the correct child. diff --git a/core/store/src/flat/types.rs b/core/store/src/flat/types.rs index f306368a289..0dd2a5ff933 100644 --- a/core/store/src/flat/types.rs +++ b/core/store/src/flat/types.rs @@ -174,7 +174,8 @@ pub struct FetchingStateStatus { pub num_parts: u64, } -/// Current step of resharding flat storage - splitting parent. +/// Holds the state associated to [FlatStorageReshardingStatus::SplittingParent]. +/// This struct stores the necessary data to execute a shard split of a parent shard into two children. #[derive( BorshSerialize, BorshDeserialize, Clone, Debug, PartialEq, Eq, serde::Serialize, ProtocolSchema, )] From c52b947aa0a114d326cd9359778d3628670f514d Mon Sep 17 00:00:00 2001 From: Trisfald Date: Thu, 3 Oct 2024 11:39:02 +0200 Subject: [PATCH 31/36] remove FlatStorageResharderInner --- chain/chain/src/flat_storage_resharder.rs | 44 +++++++++-------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index 47470447485..ca64c95cfda 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -47,13 +47,8 @@ use near_store::{ShardUId, StorageError}; /// - Background processing: the bulk of resharding is done in a separate task, see [FlatStorageResharderScheduler] /// - Interruptible: a reshard operation can be interrupted through a [FlatStorageResharderController]. /// - In the case of event `Split` the state of flat storage will go back to what it was previously. -pub struct FlatStorageResharder { - inner: FlatStorageResharderInner, -} - -/// Inner clonable object to make sharing internal state easier. #[derive(Clone)] -struct FlatStorageResharderInner { +pub struct FlatStorageResharder { runtime: Arc, resharding_event: Arc>>, } @@ -62,8 +57,7 @@ impl FlatStorageResharder { /// Creates a new `FlatStorageResharder`. pub fn new(runtime: Arc) -> Self { let resharding_event = Arc::new(Mutex::new(None)); - let inner = FlatStorageResharderInner { runtime, resharding_event }; - Self { inner } + Self { runtime, resharding_event } } /// Resumes a resharding event that was in progress. @@ -146,7 +140,7 @@ impl FlatStorageResharder { self.check_no_resharding_in_progress()?; // Change parent and children shards flat storage status. - let store = self.inner.runtime.store().flat_store(); + let store = self.runtime.store().flat_store(); let mut store_update = store.store_update(); let flat_head = retrieve_shard_flat_head(parent_shard, &store)?; let status = SplittingParentStatus { @@ -189,12 +183,12 @@ impl FlatStorageResharder { } fn set_resharding_event(&self, event: FlatStorageReshardingEventStatus) { - *self.inner.resharding_event.lock().unwrap() = Some(event); + *self.resharding_event.lock().unwrap() = Some(event); } /// Returns the current in-progress resharding event, if any. pub fn resharding_event(&self) -> Option { - self.inner.resharding_event.lock().unwrap().clone() + self.resharding_event.lock().unwrap().clone() } /// Schedules a task to split a shard. @@ -209,7 +203,7 @@ impl FlatStorageResharder { self.set_resharding_event(event); info!(target: "resharding", ?parent_shard, ?status,"scheduling flat storage shard split"); - let resharder = self.inner.clone(); + let resharder = self.clone(); let task = Box::new(move || split_shard_task(resharder, controller)); scheduler.schedule(task); } @@ -218,7 +212,7 @@ impl FlatStorageResharder { fn clean_children_shards(&self, status: &SplittingParentStatus) -> Result<(), Error> { let SplittingParentStatus { left_child_shard, right_child_shard, .. } = status; debug!(target: "resharding", ?left_child_shard, ?right_child_shard, "cleaning up children shards flat storage's content"); - let mut store_update = self.inner.runtime.store().flat_store().store_update(); + let mut store_update = self.runtime.store().flat_store().store_update(); for child in [left_child_shard, right_child_shard] { store_update.remove_all_deltas(*child); store_update.remove_all_values(*child); @@ -245,10 +239,7 @@ fn retrieve_shard_flat_head(shard: ShardUId, store: &FlatStoreAdapter) -> Result /// Task to perform the actual split of a flat storage shard. This may be a long operation time-wise. /// /// Conceptually it simply copies each key-value pair from the parent shard to the correct child. -fn split_shard_task( - resharder: FlatStorageResharderInner, - controller: FlatStorageResharderController, -) { +fn split_shard_task(resharder: FlatStorageResharder, controller: FlatStorageResharderController) { let task_status = split_shard_task_impl(resharder.clone(), controller.clone()); split_shard_task_postprocessing(resharder, task_status); info!(target: "resharding", ?task_status, "flat storage shard split task finished"); @@ -260,7 +251,7 @@ fn split_shard_task( /// Retrieve parent shard UIds and current resharding event status. /// Resharding event must be of type "Split". fn get_parent_shard_and_status( - resharder: &FlatStorageResharderInner, + resharder: &FlatStorageResharder, ) -> (ShardUId, SplittingParentStatus) { let event = resharder.resharding_event.lock().unwrap(); match event.as_ref() { @@ -275,7 +266,7 @@ fn get_parent_shard_and_status( /// /// Returns `true` if the routine completed successfully. fn split_shard_task_impl( - resharder: FlatStorageResharderInner, + resharder: FlatStorageResharder, controller: FlatStorageResharderController, ) -> FlatStorageReshardingTaskStatus { if controller.is_interrupted() { @@ -413,7 +404,7 @@ fn shard_split_handle_key_value( /// Performs post-processing of shard splitting after all key-values have been moved from parent to children. /// `success` indicates whether or not the previous phase was successful. fn split_shard_task_postprocessing( - resharder: FlatStorageResharderInner, + resharder: FlatStorageResharder, task_status: FlatStorageReshardingTaskStatus, ) { let (parent_shard, split_status) = get_parent_shard_and_status(&resharder); @@ -688,7 +679,7 @@ mod tests { let new_shard_layout = shard_layout_after_split(); let scheduler = DelayedScheduler::default(); let controller = FlatStorageResharderController::new(); - let flat_store = resharder.inner.runtime.store().flat_store(); + let flat_store = resharder.runtime.store().flat_store(); let resharding_event_type = ReshardingEventType::from_shard_layout( &new_shard_layout, chain.head().unwrap().last_block_hash, @@ -728,7 +719,7 @@ mod tests { fn resume_split_starts_from_clean_state() { init_test_logger(); let (chain, resharder) = create_fs_resharder(simple_shard_layout()); - let flat_store = resharder.inner.runtime.store().flat_store(); + let flat_store = resharder.runtime.store().flat_store(); let new_shard_layout = shard_layout_after_split(); let resharding_event_type = ReshardingEventType::from_shard_layout( &new_shard_layout, @@ -825,7 +816,7 @@ mod tests { // Check flat storages of children contain the correct accounts. let left_child = ShardUId { version: 3, shard_id: 2 }; let right_child = ShardUId { version: 3, shard_id: 3 }; - let flat_store = resharder.inner.runtime.store().flat_store(); + let flat_store = resharder.runtime.store().flat_store(); let account_mm_key = TrieKey::Account { account_id: account!("mm") }; let account_vv_key = TrieKey::Account { account_id: account!("vv") }; assert!(flat_store @@ -846,7 +837,6 @@ mod tests { assert_eq!(flat_store.get_flat_storage_status(parent), Ok(FlatStorageStatus::Empty)); assert_eq!(flat_store.iter(parent).count(), 0); assert!(resharder - .inner .runtime .get_flat_storage_manager() .get_flat_storage_for_shard(parent) @@ -888,7 +878,7 @@ mod tests { controller.clone() ) .is_ok()); - let (parent_shard, status) = get_parent_shard_and_status(&resharder.inner); + let (parent_shard, status) = get_parent_shard_and_status(&resharder); let SplittingParentStatus { left_child_shard, right_child_shard, flat_head, .. } = status; // Interrupt the task before it starts. @@ -898,7 +888,7 @@ mod tests { scheduler.call(); // Check that resharding was effectively interrupted. - let flat_store = resharder.inner.runtime.store().flat_store(); + let flat_store = resharder.runtime.store().flat_store(); assert_eq!( controller.completion_receiver.recv_timeout(Duration::from_secs(1)), Ok(FlatStorageReshardingTaskStatus::Interrupted) @@ -933,7 +923,7 @@ mod tests { // Make flat storage of parent shard not ready. let parent_shard = ShardUId { version: 3, shard_id: 1 }; - let flat_store = resharder.inner.runtime.store().flat_store(); + let flat_store = resharder.runtime.store().flat_store(); let mut store_update = flat_store.store_update(); store_update.set_flat_storage_status(parent_shard, FlatStorageStatus::Empty); store_update.commit().unwrap(); From 767012905418af99b650aa23e2787b75a3a83e11 Mon Sep 17 00:00:00 2001 From: Trisfald Date: Mon, 14 Oct 2024 11:15:26 +0200 Subject: [PATCH 32/36] formatting --- chain/chain/src/flat_storage_creator.rs | 7 +- chain/chain/src/flat_storage_resharder.rs | 80 ++++++++++++----------- 2 files changed, 45 insertions(+), 42 deletions(-) diff --git a/chain/chain/src/flat_storage_creator.rs b/chain/chain/src/flat_storage_creator.rs index f553c78cf91..1f190f44cad 100644 --- a/chain/chain/src/flat_storage_creator.rs +++ b/chain/chain/src/flat_storage_creator.rs @@ -389,9 +389,10 @@ impl FlatStorageShardCreator { FlatStorageStatus::Disabled => { panic!("initiated flat storage creation for shard {shard_id} while it is disabled"); } - // If the flat storage is undergoing resharding it means it was previously created successfully, - // but resharding itself hasn't been finished. - // This case is a no-op because the flat storage resharder has already been created in `create_flat_storage_for_current_epoch`. + // If the flat storage is undergoing resharding it means it was previously created + // successfully, but resharding itself hasn't been finished. This case is a no-op + // because the flat storage resharder has already been created in + // `create_flat_storage_for_current_epoch`. FlatStorageStatus::Resharding(_) => { return Ok(true); } diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index ca64c95cfda..510ffbdd593 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -29,24 +29,25 @@ use near_store::flat::{ }; use near_store::{ShardUId, StorageError}; -/// `FlatStorageResharder` takes care of updating flat storage when a resharding event -/// happens. +/// `FlatStorageResharder` takes care of updating flat storage when a resharding event happens. /// /// On an high level, the events supported are: /// - #### Shard splitting /// Parent shard must be split into two children. The entire operation freezes the flat storage -/// for the involved shards. -/// Children shards are created empty and the key-values of the parent will be copied into one of them, -/// in the background. +/// for the involved shards. Children shards are created empty and the key-values of the parent +/// will be copied into one of them, in the background. /// -/// After the copy is finished the children shard will have the correct state at some past block height. -/// It'll be necessary to perform catchup before the flat storage can be put again in Ready state. -/// The parent shard storage is not needed anymore and can be removed. +/// After the copy is finished the children shard will have the correct state at some past block +/// height. It'll be necessary to perform catchup before the flat storage can be put again in +/// Ready state. The parent shard storage is not needed anymore and can be removed. /// /// The resharder has also the following properties: -/// - Background processing: the bulk of resharding is done in a separate task, see [FlatStorageResharderScheduler] -/// - Interruptible: a reshard operation can be interrupted through a [FlatStorageResharderController]. -/// - In the case of event `Split` the state of flat storage will go back to what it was previously. +/// - Background processing: the bulk of resharding is done in a separate task, see +/// [FlatStorageResharderScheduler] +/// - Interruptible: a reshard operation can be interrupted through a +/// [FlatStorageResharderController]. +/// - In the case of event `Split` the state of flat storage will go back to what it was +/// previously. #[derive(Clone)] pub struct FlatStorageResharder { runtime: Arc, @@ -60,7 +61,30 @@ impl FlatStorageResharder { Self { runtime, resharding_event } } - /// Resumes a resharding event that was in progress. + /// Starts a resharding event. + /// + /// For now, only splitting a shard is supported. + /// + /// # Args: + /// * `event_type`: the type of resharding event + /// * `shard_layout`: the new shard layout + /// * `scheduler`: component used to schedule the background tasks + /// * `controller`: manages the execution of the background tasks + pub fn start_resharding( + &self, + event_type: ReshardingEventType, + shard_layout: &ShardLayout, + scheduler: &dyn FlatStorageResharderScheduler, + controller: FlatStorageResharderController, + ) -> Result<(), Error> { + match event_type { + ReshardingEventType::SplitShard(params) => { + self.split_shard(params, shard_layout, scheduler, controller) + } + } + } + + /// Resumes a resharding event that was interrupted. /// /// # Args: /// * `shard_uid`: UId of the shard @@ -97,29 +121,6 @@ impl FlatStorageResharder { Ok(()) } - /// Starts a resharding event. - /// - /// For now, only splitting a shard is supported. - /// - /// # Args: - /// * `event_type`: the type of resharding event - /// * `shard_layout`: the new shard layout - /// * `scheduler`: component used to schedule the background tasks - /// * `controller`: manages the execution of the background tasks - pub fn start_resharding( - &self, - event_type: ReshardingEventType, - shard_layout: &ShardLayout, - scheduler: &dyn FlatStorageResharderScheduler, - controller: FlatStorageResharderController, - ) -> Result<(), Error> { - match event_type { - ReshardingEventType::SplitShard(params) => { - self.split_shard(params, shard_layout, scheduler, controller) - } - } - } - /// Starts the event of splitting a parent shard flat storage into two children. fn split_shard( &self, @@ -280,8 +281,9 @@ fn split_shard_task_impl( let (parent_shard, status) = get_parent_shard_and_status(&resharder); info!(target: "resharding", ?parent_shard, ?status, "flat storage shard split task: starting key-values copy"); - // Parent shard flat storage head must be on block height just before the new shard layout kicks in. - // This guarantees that all deltas have been applied and thus the state of all key-values is up to date. + // Parent shard flat storage head must be on block height just before the new shard layout kicks + // in. This guarantees that all deltas have been applied and thus the state of all key-values is + // up to date. // TODO(trisfald): do this check, maybe call update_flat_storage_for_shard let _parent_flat_head = status.flat_head; @@ -401,8 +403,8 @@ fn shard_split_handle_key_value( Ok(()) } -/// Performs post-processing of shard splitting after all key-values have been moved from parent to children. -/// `success` indicates whether or not the previous phase was successful. +/// Performs post-processing of shard splitting after all key-values have been moved from parent to +/// children. `success` indicates whether or not the previous phase was successful. fn split_shard_task_postprocessing( resharder: FlatStorageResharder, task_status: FlatStorageReshardingTaskStatus, From a3849a243974c1b53cfcae93a4d8bd2080bf417c Mon Sep 17 00:00:00 2001 From: Trisfald Date: Mon, 14 Oct 2024 11:24:04 +0200 Subject: [PATCH 33/36] code review improvements --- chain/chain/src/flat_storage_resharder.rs | 18 ++++++++++-------- core/primitives/src/shard_layout.rs | 3 +++ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index 510ffbdd593..fe9bf0f5e7b 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -44,7 +44,7 @@ use near_store::{ShardUId, StorageError}; /// The resharder has also the following properties: /// - Background processing: the bulk of resharding is done in a separate task, see /// [FlatStorageResharderScheduler] -/// - Interruptible: a reshard operation can be interrupted through a +/// - Interruptible: a reshard operation can be cancelled through a /// [FlatStorageResharderController]. /// - In the case of event `Split` the state of flat storage will go back to what it was /// previously. @@ -86,6 +86,8 @@ impl FlatStorageResharder { /// Resumes a resharding event that was interrupted. /// + /// Flat-storage resharding will resume upon a node crash. + /// /// # Args: /// * `shard_uid`: UId of the shard /// * `status`: resharding status of the shard @@ -271,7 +273,7 @@ fn split_shard_task_impl( controller: FlatStorageResharderController, ) -> FlatStorageReshardingTaskStatus { if controller.is_interrupted() { - return FlatStorageReshardingTaskStatus::Interrupted; + return FlatStorageReshardingTaskStatus::Cancelled; } /// Determines after how many key-values the process stops to @@ -329,7 +331,7 @@ fn split_shard_task_impl( break; } if controller.is_interrupted() { - return FlatStorageReshardingTaskStatus::Interrupted; + return FlatStorageReshardingTaskStatus::Cancelled; } } FlatStorageReshardingTaskStatus::Successful @@ -436,7 +438,7 @@ fn split_shard_task_postprocessing( } // TODO(trisfald): trigger catchup } - FlatStorageReshardingTaskStatus::Failed | FlatStorageReshardingTaskStatus::Interrupted => { + FlatStorageReshardingTaskStatus::Failed | FlatStorageReshardingTaskStatus::Cancelled => { // We got an error or an interrupt request. // Reset parent. store_update.set_flat_storage_status( @@ -492,7 +494,7 @@ pub enum FlatStorageReshardingEventStatus { pub enum FlatStorageReshardingTaskStatus { Successful, Failed, - Interrupted, + Cancelled, } /// Helps control the flat storage resharder operation. More specifically, @@ -745,7 +747,7 @@ mod tests { store_update.set(child_shard, dirty_key.clone(), dirty_value.clone()); } - // Set parent state to ShardSplitting, manually, to simulate a forcibly interrupted resharding attempt. + // Set parent state to ShardSplitting, manually, to simulate a forcibly cancelled resharding attempt. let resharding_status = FlatStorageReshardingStatus::SplittingParent(SplittingParentStatus { // Values don't matter. @@ -889,11 +891,11 @@ mod tests { // Run the task. scheduler.call(); - // Check that resharding was effectively interrupted. + // Check that resharding was effectively cancelled. let flat_store = resharder.runtime.store().flat_store(); assert_eq!( controller.completion_receiver.recv_timeout(Duration::from_secs(1)), - Ok(FlatStorageReshardingTaskStatus::Interrupted) + Ok(FlatStorageReshardingTaskStatus::Cancelled) ); assert_eq!( flat_store.get_flat_storage_status(parent_shard), diff --git a/core/primitives/src/shard_layout.rs b/core/primitives/src/shard_layout.rs index 5217bf66447..022617bc35a 100644 --- a/core/primitives/src/shard_layout.rs +++ b/core/primitives/src/shard_layout.rs @@ -82,6 +82,7 @@ pub enum ShardLayout { Debug, PartialEq, Eq, + ProtocolSchema, )] pub struct ShardLayoutV0 { /// Map accounts evenly across all shards @@ -130,6 +131,7 @@ fn new_shards_split_map_v2(shards_split_map: BTreeMap>) -> ShardsS Debug, PartialEq, Eq, + ProtocolSchema, )] pub struct ShardLayoutV1 { /// The boundary accounts are the accounts on boundaries between shards. @@ -173,6 +175,7 @@ impl ShardLayoutV1 { Debug, PartialEq, Eq, + ProtocolSchema, )] pub struct ShardLayoutV2 { /// The boundary accounts are the accounts on boundaries between shards. From 17538f9910764707065593a8711ff8dbe37f2411 Mon Sep 17 00:00:00 2001 From: Trisfald Date: Mon, 14 Oct 2024 11:29:25 +0200 Subject: [PATCH 34/36] propagate error --- chain/chain/src/resharding/manager.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chain/chain/src/resharding/manager.rs b/chain/chain/src/resharding/manager.rs index 5a6fcdb6634..bf8e4267ae5 100644 --- a/chain/chain/src/resharding/manager.rs +++ b/chain/chain/src/resharding/manager.rs @@ -57,15 +57,15 @@ impl ReshardingManager { let next_epoch_id = self.epoch_manager.get_next_epoch_id_from_prev_block(prev_hash)?; let next_shard_layout = self.epoch_manager.get_shard_layout(&next_epoch_id)?; - let resharding_event_type = - ReshardingEventType::from_shard_layout(&next_shard_layout, *block_hash, *prev_hash); // Hack to ensure this logic is not applied before ReshardingV3. // TODO(#12019): proper logic. if next_shard_layout.version() < 3 { return Ok(()); } - let Ok(Some(ReshardingEventType::SplitShard(split_shard_event))) = resharding_event_type - else { + + let resharding_event_type = + ReshardingEventType::from_shard_layout(&next_shard_layout, *block_hash, *prev_hash)?; + let Some(ReshardingEventType::SplitShard(split_shard_event)) = resharding_event_type else { return Ok(()); }; From ebd144a951054085bae2e83845838c0ab0f6b313 Mon Sep 17 00:00:00 2001 From: Trisfald Date: Mon, 14 Oct 2024 11:48:35 +0200 Subject: [PATCH 35/36] make get_parent_shard_and_status a resharder function --- chain/chain/src/flat_storage_resharder.rs | 37 ++++++++++++----------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index fe9bf0f5e7b..54374e7e95b 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -223,6 +223,18 @@ impl FlatStorageResharder { store_update.commit()?; Ok(()) } + + /// Retrieves parent shard UIds and current resharding event status, only if a resharding event + /// is in progress and of type `Split`. + fn get_parent_shard_and_status(&self) -> Option<(ShardUId, SplittingParentStatus)> { + let event = self.resharding_event.lock().unwrap(); + match event.as_ref() { + Some(FlatStorageReshardingEventStatus::SplitShard(parent_shard, status)) => { + Some((*parent_shard, status.clone())) + } + None => None, + } + } } /// Retrieves the flat head of the given `shard`. @@ -251,20 +263,6 @@ fn split_shard_task(resharder: FlatStorageResharder, controller: FlatStorageResh }; } -/// Retrieve parent shard UIds and current resharding event status. -/// Resharding event must be of type "Split". -fn get_parent_shard_and_status( - resharder: &FlatStorageResharder, -) -> (ShardUId, SplittingParentStatus) { - let event = resharder.resharding_event.lock().unwrap(); - match event.as_ref() { - Some(FlatStorageReshardingEventStatus::SplitShard(parent_shard, status)) => { - (*parent_shard, status.clone()) - } - None => panic!("a resharding event must exist!"), - } -} - /// Performs the bulk of [split_shard_task]. /// /// Returns `true` if the routine completed successfully. @@ -280,7 +278,9 @@ fn split_shard_task_impl( /// commit changes and to check interruptions. const BATCH_SIZE: usize = 10_000; - let (parent_shard, status) = get_parent_shard_and_status(&resharder); + let (parent_shard, status) = resharder + .get_parent_shard_and_status() + .expect("flat storage resharding event must be Split!"); info!(target: "resharding", ?parent_shard, ?status, "flat storage shard split task: starting key-values copy"); // Parent shard flat storage head must be on block height just before the new shard layout kicks @@ -411,7 +411,9 @@ fn split_shard_task_postprocessing( resharder: FlatStorageResharder, task_status: FlatStorageReshardingTaskStatus, ) { - let (parent_shard, split_status) = get_parent_shard_and_status(&resharder); + let (parent_shard, split_status) = resharder + .get_parent_shard_and_status() + .expect("flat storage resharding event must be Split!"); let SplittingParentStatus { left_child_shard, right_child_shard, flat_head, .. } = split_status; let flat_store = resharder.runtime.store().flat_store(); info!(target: "resharding", ?parent_shard, ?task_status, ?split_status, "flat storage shard split task: post-processing"); @@ -504,7 +506,6 @@ pub struct FlatStorageResharderController { /// Resharding handle to control interruption. handle: ReshardingHandle, /// This object will be used to signal when the background task is completed. - /// A value of `true` means that the operation completed successfully. completion_sender: Sender, /// Corresponding receiver for `completion_sender`. pub completion_receiver: Receiver, @@ -882,7 +883,7 @@ mod tests { controller.clone() ) .is_ok()); - let (parent_shard, status) = get_parent_shard_and_status(&resharder); + let (parent_shard, status) = resharder.get_parent_shard_and_status().unwrap(); let SplittingParentStatus { left_child_shard, right_child_shard, flat_head, .. } = status; // Interrupt the task before it starts. From 4468660199c9a67bd9f592b53acc72d52b91067a Mon Sep 17 00:00:00 2001 From: Trisfald Date: Mon, 14 Oct 2024 12:20:52 +0200 Subject: [PATCH 36/36] update protocol schema --- tools/protocol-schema-check/res/protocol_schema.toml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/protocol-schema-check/res/protocol_schema.toml b/tools/protocol-schema-check/res/protocol_schema.toml index 73de4e4b7c2..3b31a183e0a 100644 --- a/tools/protocol-schema-check/res/protocol_schema.toml +++ b/tools/protocol-schema-check/res/protocol_schema.toml @@ -115,8 +115,8 @@ FlatStateDeltaMetadata = 3401366797 FlatStateValue = 83834662 FlatStorageCreationStatus = 3717607657 FlatStorageReadyStatus = 677315221 -FlatStorageReshardingStatus = 1905062447 -FlatStorageStatus = 3573153526 +FlatStorageReshardingStatus = 4155800626 +FlatStorageStatus = 2745297627 FunctionCallAction = 2405840012 FunctionCallError = 3652274053 FunctionCallPermission = 1517509673 @@ -196,7 +196,10 @@ ShardChunkHeaderV2 = 3706194757 ShardChunkHeaderV3 = 2763275079 ShardChunkV1 = 1814805625 ShardChunkV2 = 1857597167 -ShardLayout = 3421343543 +ShardLayout = 2672297879 +ShardLayoutV0 = 3139625127 +ShardLayoutV1 = 198917829 +ShardLayoutV2 = 1739189967 ShardProof = 2773021473 ShardStateSyncResponse = 2185281594 ShardStateSyncResponseHeaderV1 = 2708725662 @@ -211,7 +214,7 @@ SignedTransaction = 3898692301 SlashState = 3264273950 SlashedValidator = 2601657743 SnapshotHostInfo = 278564957 -SplittingParentStatus = 3566488936 +SplittingParentStatus = 3614986382 StakeAction = 2002027105 StateChangeCause = 1569242014 StateHeaderKey = 1385533899