Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: apply log_summary_style to state sync phase message #8735

Merged
merged 8 commits into from
Mar 21, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 30 additions & 31 deletions chain/client/src/client.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,16 @@
//! Client is responsible for tracking the chain, chunks, and producing them when needed.
//! This client works completely synchronously and must be operated by some async actor outside.

use std::cmp::max;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use std::time::{Duration, Instant};

use crate::adapter::ProcessTxResponse;
use crate::debug::BlockProductionTracker;
use crate::debug::PRODUCTION_TIMES_CACHE_SIZE;
use crate::sync::block::BlockSync;
use crate::sync::epoch::EpochSync;
use crate::sync::header::HeaderSync;
use crate::sync::state::{StateSync, StateSyncResult};
use crate::{metrics, SyncStatus};
use lru::LruCache;
use near_async::messaging::{CanSend, Sender};
use near_chunks::adapter::ShardsManagerRequestFromClient;
use near_chunks::client::ShardedTransactionPool;
use near_chunks::logic::{
cares_about_shard_this_or_next_epoch, decode_encoded_chunk, persist_chunk,
};
use near_client_primitives::debug::ChunkProduction;
use near_primitives::static_clock::StaticClock;
use near_store::metadata::DbKind;
use tracing::{debug, error, info, trace, warn};

use near_chain::chain::{
ApplyStatePartsRequest, BlockCatchUpRequest, BlockMissingChunks, BlocksCatchUpState,
OrphanMissingChunks, StateSplitRequest, TX_ROUTING_HEIGHT_HORIZON,
Expand All @@ -30,43 +23,47 @@ use near_chain::{
DoneApplyChunkCallback, Doomslug, DoomslugThresholdMode, Provenance,
RuntimeWithEpochManagerAdapter,
};
use near_chain_configs::{ClientConfig, UpdateableClientConfig};
use near_chain_configs::{ClientConfig, LogSummaryStyle, UpdateableClientConfig};
use near_chunks::adapter::ShardsManagerRequestFromClient;
use near_chunks::client::ShardedTransactionPool;
use near_chunks::logic::{
cares_about_shard_this_or_next_epoch, decode_encoded_chunk, persist_chunk,
};
use near_chunks::ShardsManager;
use near_client_primitives::debug::ChunkProduction;
use near_client_primitives::types::{Error, ShardSyncDownload, ShardSyncStatus};
use near_network::types::{AccountKeys, ChainInfo, PeerManagerMessageRequest, SetChainInfo};
use near_network::types::{
HighestHeightPeerInfo, NetworkRequests, PeerManagerAdapter, ReasonForBan,
};
use near_o11y::log_assert;
use near_primitives::block::{Approval, ApprovalInner, ApprovalMessage, Block, BlockHeader, Tip};
use near_primitives::block_header::ApprovalType;
use near_primitives::challenge::{Challenge, ChallengeBody};
use near_primitives::epoch_manager::RngSeed;
use near_primitives::hash::CryptoHash;
use near_primitives::merkle::{merklize, MerklePath, PartialMerkleTree};
use near_primitives::network::PeerId;
use near_primitives::receipt::Receipt;
use near_primitives::sharding::{
ChunkHash, EncodedShardChunk, PartialEncodedChunk, ReedSolomonWrapper, ShardChunk,
ShardChunkHeader, ShardInfo,
};
use near_primitives::static_clock::StaticClock;
use near_primitives::transaction::SignedTransaction;
use near_primitives::types::chunk_extra::ChunkExtra;
use near_primitives::types::{AccountId, ApprovalStake, BlockHeight, EpochId, NumBlocks, ShardId};
use near_primitives::unwrap_or_return;
use near_primitives::utils::MaybeValidated;
use near_primitives::validator_signer::ValidatorSigner;

use crate::adapter::ProcessTxResponse;
use crate::debug::BlockProductionTracker;
use crate::debug::PRODUCTION_TIMES_CACHE_SIZE;
use crate::sync::block::BlockSync;
use crate::sync::epoch::EpochSync;
use crate::sync::header::HeaderSync;
use crate::sync::state::{StateSync, StateSyncResult};
use crate::{metrics, SyncStatus};
use near_client_primitives::types::{Error, ShardSyncDownload, ShardSyncStatus};
use near_network::types::{AccountKeys, ChainInfo, PeerManagerMessageRequest, SetChainInfo};
use near_o11y::log_assert;
use near_primitives::block_header::ApprovalType;
use near_primitives::epoch_manager::RngSeed;
use near_primitives::network::PeerId;
use near_primitives::version::PROTOCOL_VERSION;
use near_primitives::views::{CatchupStatusView, DroppedReason};
use near_store::metadata::DbKind;
use std::cmp::max;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use std::time::{Duration, Instant};
use tracing::{debug, error, info, trace, warn};

const NUM_REBROADCAST_BLOCKS: usize = 30;
const CHUNK_HEADERS_FOR_INCLUSION_CACHE_SIZE: usize = 2048;
Expand Down Expand Up @@ -2126,6 +2123,7 @@ impl Client {
"Catchup me: {:?}: sync_hash: {:?}, sync_info: {:?}", me, sync_hash, new_shard_sync
);

let use_colour = matches!(self.config.log_summary_style, LogSummaryStyle::Colored);
match state_sync.run(
me,
sync_hash,
Expand All @@ -2136,6 +2134,7 @@ impl Client {
state_sync_info.shards.iter().map(|tuple| tuple.0).collect(),
state_parts_task_scheduler,
state_split_scheduler,
use_colour,
)? {
StateSyncResult::Unchanged => {}
StateSyncResult::Changed(fetch_block) => {
Expand Down
5 changes: 4 additions & 1 deletion chain/client/src/client_actor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use near_chain::{
byzantine_assert, near_chain_primitives, Block, BlockHeader, BlockProcessingArtifact,
ChainGenesis, DoneApplyChunkCallback, Provenance, RuntimeWithEpochManagerAdapter,
};
use near_chain_configs::ClientConfig;
use near_chain_configs::{ClientConfig, LogSummaryStyle};
use near_chunks::adapter::ShardsManagerRequestFromClient;
use near_chunks::client::ShardsManagerResponse;
use near_chunks::logic::cares_about_shard_this_or_next_epoch;
Expand Down Expand Up @@ -1619,6 +1619,8 @@ impl ClientActor {
unwrap_and_report!(self.client.chain.reset_data_pre_state_sync(sync_hash));
}

let use_colour =
matches!(self.client.config.log_summary_style, LogSummaryStyle::Colored);
match unwrap_and_report!(self.client.state_sync.run(
&me,
sync_hash,
Expand All @@ -1629,6 +1631,7 @@ impl ClientActor {
shards_to_sync,
&self.state_parts_task_scheduler,
&self.state_split_scheduler,
use_colour,
)) {
StateSyncResult::Unchanged => (),
StateSyncResult::Changed(fetch_block) => {
Expand Down
67 changes: 46 additions & 21 deletions chain/client/src/sync/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
//!

use ansi_term::Color::{Purple, Yellow};
use ansi_term::Style;
use chrono::{DateTime, Duration, Utc};
use futures::{future, FutureExt};
use near_async::messaging::CanSendAsync;
Expand Down Expand Up @@ -176,6 +177,7 @@ impl StateSync {
now: DateTime<Utc>,
state_parts_task_scheduler: &dyn Fn(ApplyStatePartsRequest),
state_split_scheduler: &dyn Fn(StateSplitRequest),
use_colour: bool,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Somewhat unfortunate that we must thread this through like that…

) -> Result<(bool, bool), near_chain::Error> {
let mut all_done = true;
let mut update_sync_status = false;
Expand Down Expand Up @@ -276,30 +278,12 @@ impl StateSync {
%shard_id,
timeout_sec = self.timeout.num_seconds(),
"State sync didn't download the state, sending StateRequest again");
tracing::info!(target: "sync",
tracing::info!(
target: "sync",
%shard_id,
%sync_hash,
?me,
phase = ?match shard_sync_download.status {
ShardSyncStatus::StateDownloadHeader => format!("{} requests sent {}, last target {:?}",
Purple.bold().paint("HEADER".to_string()),
shard_sync_download.downloads[0].state_requests_count,
shard_sync_download.downloads[0].last_target),
ShardSyncStatus::StateDownloadParts => { let mut text = "".to_string();
for (i, download) in shard_sync_download.downloads.iter().enumerate() {
text.push_str(&format!("[{}: {}, {}, {:?}] ",
Yellow.bold().paint(i.to_string()),
download.done,
download.state_requests_count,
download.last_target));
}
format!("{} [{}: is_done, requests sent, last target] {}",
Purple.bold().paint("PARTS"),
Yellow.bold().paint("part_id"),
text)
}
_ => unreachable!("timeout cannot happen when all state is downloaded"),
},
phase = format_shard_sync_phase(&shard_sync_download, use_colour),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this would be better served by splitting the phase up into different tracing KVs? for example state_requests_count and last_target could both easily be KVs I think 🤔

Though adding colors to that would be even harder.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm worried about the StateDownloadParts branch. Some shards have a lot of shards, which results in huge log messages. I definitely don't want to export 50k tracing KVs from a single span.
At the moment, this way-too-verbose logging is still useful for development of State Sync. Currently, I'm leaning to refactoring the code and keeping the logging.

I'll consider simplifying the log line in the future. Coutning the number of done/not-done parts should be useful enough in most cases.

"State sync status");
}

Expand Down Expand Up @@ -618,6 +602,7 @@ impl StateSync {
tracking_shards: Vec<ShardId>,
state_parts_task_scheduler: &dyn Fn(ApplyStatePartsRequest),
state_split_scheduler: &dyn Fn(StateSplitRequest),
use_colour: bool,
) -> Result<StateSyncResult, near_chain::Error> {
let _span = tracing::debug_span!(target: "sync", "run", sync = "StateSync").entered();
tracing::debug!(target: "sync", %sync_hash, ?tracking_shards, "syncing state");
Expand Down Expand Up @@ -651,6 +636,7 @@ impl StateSync {
now,
state_parts_task_scheduler,
state_split_scheduler,
use_colour,
)?;

if have_block && all_done {
Expand Down Expand Up @@ -973,6 +959,45 @@ impl StateSync {
}
}

fn paint(s: &str, colour: Style, use_colour: bool) -> String {
if use_colour {
colour.paint(s).to_string()
} else {
s.to_string()
}
}

/// Formats the given ShardSyncDownload for logging.
fn format_shard_sync_phase(shard_sync_download: &ShardSyncDownload, use_colour: bool) -> String {
match shard_sync_download.status {
ShardSyncStatus::StateDownloadHeader => format!(
"{} requests sent {}, last target {:?}",
paint("HEADER", Purple.bold(), use_colour),
shard_sync_download.downloads[0].state_requests_count,
shard_sync_download.downloads[0].last_target
),
ShardSyncStatus::StateDownloadParts => {
let mut text = "".to_string();
for (i, download) in shard_sync_download.downloads.iter().enumerate() {
text.push_str(&format!(
"[{}: {}, {}, {:?}] ",
paint(&i.to_string(), Yellow.bold(), use_colour),
download.done,
download.state_requests_count,
download.last_target
));
}
format!(
"{} [{}: is_done, requests sent, last target] {}",
paint("PARTS", Purple.bold(), use_colour),
paint("part_id", Yellow.bold(), use_colour),
text
)
}
_ => unreachable!("timeout cannot happen when all state is downloaded"),
}
}

/// Create an abstract collection of elements to be shuffled.
/// Each element will appear in the shuffled output exactly `limit` times.
/// Use it as an iterator to access the shuffled collection.
Expand Down