Skip to content

Commit

Permalink
fix(congestion_control) - fix congestion info bootstrapping trigger (#…
Browse files Browse the repository at this point in the history
…11500)

The congestion control bootstrapping is currently not triggered at all.
This PR fixes the issue. The root cause is that the congestion control
was set to default at the first chunk of the epoch where congestion
control is enabled. This wasn't an issue originally because of another
bug that was fixed since. The fixed bug was to use congestion info of
the previous block.

Now that we correctly use the most recent congestion information we need
to handle the protocol upgrade some other way. I considered a few
different options:
1) Make the congestion information in the chunk header an Option and
have it set to None in the first chunk where congestion control is
enabled. This is annoying because this Option is only needed once but
will be stuck in the header forever.
2) Use the old (V2) chunk header inner in the first chunk where
congestion control is enabled. This is annoying because we need to allow
an older chunk header in the blocks where typically only current chunk
header would be expected.
3) Set the congestion info to default in the first chunk where
congestion control is enabled. This is annoying because this info would
be incorrect and because we would need some other way of correctly
triggering bootstrapping.
4) Adding the congestion info to the old chunk header - that is
incorrect, we should never modify existing headers.
5) Have some intermediate header and multiple protocol version upgrades
- this is a bit too involved.

I only really considered options 1 and 2 as reasonable choices. I picked
option 2 for the solution as it's more future friendly - we are only
inconvenienced once and in the future congestion control will be
non-optional in the header.
  • Loading branch information
wacban authored Jun 7, 2024
1 parent 9a6820b commit 6038222
Show file tree
Hide file tree
Showing 16 changed files with 157 additions and 85 deletions.
5 changes: 0 additions & 5 deletions chain/chain/src/stateless_validation/chunk_validation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -488,17 +488,12 @@ pub fn validate_chunk_state_witness(
}
}

let parent_hash = state_witness.chunk_header.prev_block_hash();
let header_epoch_id = epoch_manager.get_epoch_id_from_prev_block(parent_hash)?;
let header_protocol_version = epoch_manager.get_epoch_protocol_version(&header_epoch_id)?;

// Finally, verify that the newly proposed chunk matches everything we have computed.
let (outgoing_receipts_root, _) = merklize(&outgoing_receipts_hashes);
validate_chunk_with_chunk_extra_and_receipts_root(
&chunk_extra,
&state_witness.chunk_header,
&outgoing_receipts_root,
header_protocol_version,
)?;

Ok(())
Expand Down
63 changes: 13 additions & 50 deletions chain/chain/src/validate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ use near_primitives::sharding::{ShardChunk, ShardChunkHeader};
use near_primitives::transaction::SignedTransaction;
use near_primitives::types::chunk_extra::ChunkExtra;
use near_primitives::types::{AccountId, BlockHeight, EpochId, Nonce};
use near_primitives::version::{ProtocolFeature, ProtocolVersion};

use crate::types::RuntimeAdapter;
use crate::{byzantine_assert, Chain};
Expand Down Expand Up @@ -126,14 +125,10 @@ pub fn validate_chunk_with_chunk_extra(
};
let (outgoing_receipts_root, _) = merklize(&outgoing_receipts_hashes);

let header_epoch_id = epoch_manager.get_epoch_id_from_prev_block(prev_block_hash)?;
let header_protocol_version = epoch_manager.get_epoch_protocol_version(&header_epoch_id)?;

validate_chunk_with_chunk_extra_and_receipts_root(
prev_chunk_extra,
chunk_header,
&outgoing_receipts_root,
header_protocol_version,
)
}

Expand All @@ -142,7 +137,6 @@ pub fn validate_chunk_with_chunk_extra_and_receipts_root(
prev_chunk_extra: &ChunkExtra,
chunk_header: &ShardChunkHeader,
outgoing_receipts_root: &CryptoHash,
header_protocol_version: ProtocolVersion,
) -> Result<(), Error> {
if *prev_chunk_extra.state_root() != chunk_header.prev_state_root() {
return Err(Error::InvalidStateRoot);
Expand Down Expand Up @@ -183,11 +177,7 @@ pub fn validate_chunk_with_chunk_extra_and_receipts_root(
return Err(Error::InvalidGasLimit);
}

validate_congestion_info(
&prev_chunk_extra.congestion_info(),
&chunk_header.congestion_info(),
header_protocol_version,
)?;
validate_congestion_info(&prev_chunk_extra.congestion_info(), &chunk_header.congestion_info())?;

Ok(())
}
Expand All @@ -199,52 +189,25 @@ pub fn validate_chunk_with_chunk_extra_and_receipts_root(
fn validate_congestion_info(
extra_congestion_info: &Option<CongestionInfo>,
header_congestion_info: &Option<CongestionInfo>,
header_protocol_version: ProtocolVersion,
) -> Result<(), Error> {
// The congestion info should be Some iff the congestion control features is enabled.
let enabled = ProtocolFeature::CongestionControl.enabled(header_protocol_version);
if header_congestion_info.is_some() != enabled {
return Err(Error::InvalidCongestionInfo(format!(
"Congestion Information version mismatch. version {}, enabled: {}, info {:?}",
header_protocol_version, enabled, header_congestion_info
)));
}

match (extra_congestion_info, header_congestion_info) {
// If both are none then there is no congestion info to validate.
(None, None) => Ok(()),
// If the congestion control is enabled in the previous chunk then it should
// also be enabled in the current chunk.
(Some(info), None) => Err(Error::InvalidCongestionInfo(format!(
"Congestion Information disappeared. {:?}.",
info
// It is invalid to have one None and one Some. The congestion info in
// header should always be derived from the congestion info in extra.
(None, Some(_)) | (Some(_), None) => Err(Error::InvalidCongestionInfo(format!(
"Congestion Information mismatch. extra: {:?}, header: {:?}",
extra_congestion_info, header_congestion_info
))),
// At the epoch boundary where congestion control was enabled the chunk
// extra does not have the congestion control enabled and the header does
// have it enabled. The chunk extra of the previous chunk does not have
// congestion info so the congestion info in the current chunk header should
// be set to the default one.
(None, Some(info)) => {
if info == &CongestionInfo::default() {
Ok(())
} else {
Err(Error::InvalidCongestionInfo(format!(
"Congestion Information invalid after protocol upgrade. {:?}",
info
)))
}
}
// Congestion Info is present in both the extra and the header. Validate it.
(Some(extra), Some(header)) => {
if !CongestionInfo::validate_extra_and_header(extra, header) {
Err(Error::InvalidCongestionInfo(format!(
"Congestion Information mismatch. extra: {:?}, header: {:?}",
(Some(extra), Some(header)) => CongestionInfo::validate_extra_and_header(extra, header)
.then_some(())
.ok_or_else(|| {
Error::InvalidCongestionInfo(format!(
"Congestion Information validate error. extra: {:?}, header: {:?}",
extra, header
)))
} else {
Ok(())
}
}
))
}),
}
}

Expand Down
2 changes: 1 addition & 1 deletion chain/chunks/src/shards_manager_actor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1976,7 +1976,7 @@ impl ShardsManagerActor {
prev_outgoing_receipts: &[Receipt],
prev_outgoing_receipts_root: CryptoHash,
tx_root: CryptoHash,
congestion_info: CongestionInfo,
congestion_info: Option<CongestionInfo>,
signer: &dyn ValidatorSigner,
rs: &ReedSolomon,
protocol_version: ProtocolVersion,
Expand Down
6 changes: 5 additions & 1 deletion chain/chunks/src/test_loop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use near_network::{
types::{NetworkRequests, PeerManagerMessageRequest},
};
use near_primitives::congestion_info::CongestionInfo;
use near_primitives::version::ProtocolFeature;
use near_primitives::{
hash::CryptoHash,
merkle::{self, MerklePath},
Expand Down Expand Up @@ -262,6 +263,9 @@ impl MockChainForShardsManager {
let data_parts = self.epoch_manager.num_data_parts();
let parity_parts = self.epoch_manager.num_total_parts() - data_parts;
let rs = ReedSolomon::new(data_parts, parity_parts).unwrap();
let congestion_info = ProtocolFeature::CongestionControl
.enabled(PROTOCOL_VERSION)
.then_some(CongestionInfo::default());
let (chunk, merkle_paths) = ShardsManagerActor::create_encoded_shard_chunk(
self.tip.last_block_hash,
CryptoHash::default(),
Expand All @@ -276,7 +280,7 @@ impl MockChainForShardsManager {
&receipts,
receipts_root,
MerkleHash::default(),
CongestionInfo::default(),
congestion_info,
&signer,
&rs,
PROTOCOL_VERSION,
Expand Down
7 changes: 5 additions & 2 deletions chain/chunks/src/test_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use near_primitives::sharding::{
use near_primitives::test_utils::create_test_signer;
use near_primitives::types::MerkleHash;
use near_primitives::types::{AccountId, EpochId, ShardId};
use near_primitives::version::PROTOCOL_VERSION;
use near_primitives::version::{ProtocolFeature, PROTOCOL_VERSION};
use near_store::test_utils::create_test_store;
use near_store::Store;
use reed_solomon_erasure::galois_8::ReedSolomon;
Expand Down Expand Up @@ -136,6 +136,9 @@ impl ChunkTestFixture {
let shard_layout = epoch_manager.get_shard_layout(&EpochId::default()).unwrap();
let receipts_hashes = Chain::build_receipts_hashes(&receipts, &shard_layout);
let (receipts_root, _) = merkle::merklize(&receipts_hashes);
let congestion_info = ProtocolFeature::CongestionControl
.enabled(PROTOCOL_VERSION)
.then_some(CongestionInfo::default());
let (mock_chunk, mock_merkle_paths) = ShardsManagerActor::create_encoded_shard_chunk(
mock_parent_hash,
Default::default(),
Expand All @@ -150,7 +153,7 @@ impl ChunkTestFixture {
&receipts,
receipts_root,
MerkleHash::default(),
CongestionInfo::default(),
congestion_info,
&signer,
&rs,
PROTOCOL_VERSION,
Expand Down
6 changes: 1 addition & 5 deletions chain/client/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -933,11 +933,7 @@ impl Client {
#[cfg(feature = "test_features")]
let gas_used = if self.produce_invalid_chunks { gas_used + 1 } else { gas_used };

// The congestion info is set to default if it is not present. If the
// congestion control feature is not enabled the congestion info will be
// stripped from the chunk header anyway. In the first chunk where
// feature is enabled the header will contain the default congestion info.
let congestion_info = chunk_extra.congestion_info().unwrap_or_default();
let congestion_info = chunk_extra.congestion_info();
let (encoded_chunk, merkle_paths) = ShardsManagerActor::create_encoded_shard_chunk(
prev_block_hash,
*chunk_extra.state_root(),
Expand Down
3 changes: 1 addition & 2 deletions chain/client/src/test_utils/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,7 @@ pub fn create_chunk(
transactions,
decoded_chunk.prev_outgoing_receipts(),
header.prev_outgoing_receipts_root(),
// TODO(congestion_control): compute if not available
header.congestion_info().unwrap_or_default(),
header.congestion_info(),
&*signer,
PROTOCOL_VERSION,
)
Expand Down
7 changes: 5 additions & 2 deletions chain/client/src/tests/process_blocks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ fn test_bad_shard_id() {
// modify chunk 0 to have shard_id 1
let chunk = chunks.get(0).unwrap();
let outgoing_receipts_root = chunks.get(1).unwrap().prev_outgoing_receipts_root();
let congestion_info = ProtocolFeature::CongestionControl
.enabled(PROTOCOL_VERSION)
.then_some(CongestionInfo::default());
let mut modified_chunk = ShardChunkHeaderV3::new(
PROTOCOL_VERSION,
*chunk.prev_block_hash(),
Expand All @@ -80,7 +83,7 @@ fn test_bad_shard_id() {
outgoing_receipts_root,
chunk.tx_root(),
chunk.prev_validator_proposals().collect(),
CongestionInfo::default(),
congestion_info,
&validator_signer,
);
modified_chunk.height_included = 2;
Expand Down Expand Up @@ -209,7 +212,7 @@ fn test_bad_congestion_info_impl(mode: BadCongestionInfoMode) {
chunk.prev_outgoing_receipts_root(),
chunk.tx_root(),
chunk.prev_validator_proposals().collect(),
congestion_info,
Some(congestion_info),
&validator_signer,
);
modified_chunk_header.height_included = 2;
Expand Down
7 changes: 5 additions & 2 deletions chain/epoch-manager/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use near_primitives::sharding::{ShardChunkHeader, ShardChunkHeaderV3};
use near_primitives::stateless_validation::PartialEncodedStateWitness;
use near_primitives::types::ValidatorKickoutReason::{NotEnoughBlocks, NotEnoughChunks};
use near_primitives::validator_signer::ValidatorSigner;
use near_primitives::version::ProtocolFeature::{SimpleNightshade, StatelessValidationV0};
use near_primitives::version::ProtocolFeature::{self, SimpleNightshade, StatelessValidationV0};
use near_primitives::version::PROTOCOL_VERSION;
use near_store::test_utils::create_test_store;
use num_rational::Ratio;
Expand Down Expand Up @@ -2730,6 +2730,9 @@ fn test_max_kickout_stake_ratio() {
}

fn test_chunk_header(h: &[CryptoHash], signer: &dyn ValidatorSigner) -> ShardChunkHeader {
let congestion_info = ProtocolFeature::CongestionControl
.enabled(PROTOCOL_VERSION)
.then_some(CongestionInfo::default());
ShardChunkHeader::V3(ShardChunkHeaderV3::new(
PROTOCOL_VERSION,
h[0],
Expand All @@ -2745,7 +2748,7 @@ fn test_chunk_header(h: &[CryptoHash], signer: &dyn ValidatorSigner) -> ShardChu
h[2],
h[2],
vec![],
CongestionInfo::default(),
congestion_info,
signer,
))
}
Expand Down
7 changes: 6 additions & 1 deletion core/primitives/src/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use crate::version::{ProtocolVersion, SHARD_CHUNK_HEADER_UPGRADE_VERSION};
use borsh::{BorshDeserialize, BorshSerialize};
use near_crypto::Signature;
use near_primitives_core::types::ShardId;
use near_primitives_core::version::ProtocolFeature;
use near_time::Utc;
use primitive_types::U256;
use reed_solomon_erasure::galois_8::ReedSolomon;
Expand Down Expand Up @@ -105,6 +106,10 @@ pub fn genesis_chunks(
std::iter::repeat(state_roots[0]).take(shard_ids.len()).collect()
};

let congestion_info = ProtocolFeature::CongestionControl
.enabled(genesis_protocol_version)
.then_some(CongestionInfo::default());

shard_ids
.into_iter()
.zip(state_roots)
Expand All @@ -124,7 +129,7 @@ pub fn genesis_chunks(
vec![],
&[],
CryptoHash::default(),
CongestionInfo::default(),
congestion_info,
&EmptyValidatorSigner::default(),
genesis_protocol_version,
)
Expand Down
8 changes: 6 additions & 2 deletions core/primitives/src/congestion_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,12 @@ impl CongestionInfo {
// information from the chunk extra.
//
// TODO(congestion_control) validate allowed shard correctly
// If the shard is fully congested the any of the other shards can be the allowed shard.
// If the shard is not fully congested the allowed shard should be set to self.
// * If the shard is fully congested then any of the other shards can be the
// allowed shard.
// * If the shard is not fully congested the allowed shard should be set to
// self.
// Currently the check is more restrictive and expects all nodes to follow
// the reference implementation which makes it part of the protocol.
pub fn validate_extra_and_header(extra: &CongestionInfo, header: &CongestionInfo) -> bool {
match (extra, header) {
(CongestionInfo::V1(extra), CongestionInfo::V1(header)) => {
Expand Down
13 changes: 9 additions & 4 deletions core/primitives/src/sharding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,11 @@ impl ShardChunkHeaderV3 {
prev_outgoing_receipts_root: CryptoHash,
tx_root: CryptoHash,
prev_validator_proposals: Vec<ValidatorStake>,
congestion_info: CongestionInfo,
congestion_info: Option<CongestionInfo>,
signer: &dyn ValidatorSigner,
) -> Self {
let inner = if ProtocolFeature::CongestionControl.enabled(protocol_version) {
let inner = if let Some(congestion_info) = congestion_info {
assert!(ProtocolFeature::CongestionControl.enabled(protocol_version));
ShardChunkHeaderInner::V3(ShardChunkHeaderInnerV3 {
prev_block_hash,
prev_state_root,
Expand Down Expand Up @@ -451,9 +452,13 @@ impl ShardChunkHeader {
SHARD_CHUNK_HEADER_UPGRADE_VERSION <= version && version < BLOCK_HEADER_V3_VERSION
}
ShardChunkHeader::V3(header) => match header.inner {
ShardChunkHeaderInner::V1(_) | ShardChunkHeaderInner::V2(_) => {
ShardChunkHeaderInner::V1(_) => {
version >= BLOCK_HEADER_V3_VERSION && version < CONGESTION_CONTROL_VERSION
}
// Note that we allow V2 in the congestion control version.
// That is because the first chunk where this feature is
// enabled does not have the congestion info.
ShardChunkHeaderInner::V2(_) => version >= BLOCK_HEADER_V3_VERSION,
ShardChunkHeaderInner::V3(_) => version >= CONGESTION_CONTROL_VERSION,
},
}
Expand Down Expand Up @@ -1031,7 +1036,7 @@ impl EncodedShardChunk {
transactions: Vec<SignedTransaction>,
prev_outgoing_receipts: &[Receipt],
prev_outgoing_receipts_root: CryptoHash,
congestion_info: CongestionInfo,
congestion_info: Option<CongestionInfo>,
signer: &dyn ValidatorSigner,
protocol_version: ProtocolVersion,
) -> Result<(Self, Vec<MerklePath>), std::io::Error> {
Expand Down
8 changes: 6 additions & 2 deletions core/primitives/src/stateless_validation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use bytesize::ByteSize;
use near_crypto::{PublicKey, Signature};
use near_primitives_core::hash::CryptoHash;
use near_primitives_core::types::{AccountId, Balance, BlockHeight, ShardId};
use near_primitives_core::version::PROTOCOL_VERSION;
use near_primitives_core::version::{ProtocolFeature, PROTOCOL_VERSION};

// The value here is the same as NETWORK_MESSAGE_MAX_SIZE_BYTES.
pub const MAX_CHUNK_STATE_WITNESS_SIZE: ByteSize = ByteSize::mib(512);
Expand Down Expand Up @@ -353,6 +353,10 @@ impl ChunkStateWitness {
}

pub fn new_dummy(height: BlockHeight, shard_id: ShardId, prev_block_hash: CryptoHash) -> Self {
let congestion_info = ProtocolFeature::CongestionControl
.enabled(PROTOCOL_VERSION)
.then_some(CongestionInfo::default());

let header = ShardChunkHeader::V3(ShardChunkHeaderV3::new(
PROTOCOL_VERSION,
prev_block_hash,
Expand All @@ -368,7 +372,7 @@ impl ChunkStateWitness {
Default::default(),
Default::default(),
Default::default(),
CongestionInfo::default(),
congestion_info,
&EmptyValidatorSigner::default(),
));
Self::new(
Expand Down
Loading

0 comments on commit 6038222

Please sign in to comment.