Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(node): new observability bottom up tracing/metrics #1061

Merged
merged 40 commits into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
21baf8b
feat: introduce metrics crate
karlem Jul 4, 2024
4d0905e
feat: improve library with macros, add top down events
karlem Jul 5, 2024
b20f487
feat: emit metrics
karlem Jul 8, 2024
52f195c
feat: add remaining top down events
karlem Jul 9, 2024
e0cb9fc
feat: remove unused code & fix build
karlem Jul 9, 2024
5ca78d7
feat: address comments
karlem Jul 10, 2024
9500325
feat: use latency wrapper instead
karlem Jul 10, 2024
ff3c09c
feat: add config filters
karlem Jul 11, 2024
5111bb4
feat: add config as a cmd flags
karlem Jul 12, 2024
8ca34f1
feat: fix comments
karlem Jul 15, 2024
6a8f949
feat: add consensus traces
karlem Jul 12, 2024
6ee97b5
feat: add execution metrics
karlem Jul 12, 2024
4085531
feat: add proposals metrics & reason
karlem Jul 15, 2024
313ff71
feat: add mempool event
karlem Jul 16, 2024
f91bfa1
feat: remove
karlem Jul 16, 2024
e801cdc
feat: address comments
karlem Jul 16, 2024
97ee818
feat: address comments
karlem Jul 16, 2024
6b12da4
feat: fix clippy
karlem Jul 16, 2024
2300c6e
minor cleanup.
raulk Jul 16, 2024
66d8daf
feat: fix typo
karlem Jul 16, 2024
48b4a08
feat: standartize mpool trace
karlem Jul 16, 2024
d1248b7
feat: address comments
karlem Jul 18, 2024
1c77f3a
lint: clippy
karlem Jul 18, 2024
e94d790
feat: add observe events & metrics to checkpoint
karlem Jul 9, 2024
a91ff7a
feat: improve hash
karlem Jul 10, 2024
5e85885
feat: trigger last event
karlem Jul 12, 2024
60ecd3b
feat: add log to releayer
karlem Jul 15, 2024
5a2c6f8
feat: add to relayer
karlem Jul 15, 2024
7faf328
feat: fix clippy
karlem Jul 15, 2024
92fa8b2
feat: remove unused code
karlem Jul 15, 2024
af0ff70
feat: emit checkpoint finalized from fvm & cleanup after merge
karlem Jul 17, 2024
6ddaf9b
fix: remove dead code
karlem Jul 18, 2024
0794939
feat: change public key
karlem Jul 18, 2024
8b938d4
feat: update main
karlem Jul 19, 2024
7431138
fix: finish rebase
karlem Jul 19, 2024
02f68ef
feat: address PR commnets
karlem Jul 19, 2024
7803cc2
feat: rename event
karlem Jul 19, 2024
27b1a67
rename events: CheckpointFinalized + CheckpointSubmitted.
raulk Jul 22, 2024
e66910b
add comments.
raulk Jul 22, 2024
43df0a9
cargo fmt.
raulk Jul 22, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 7 additions & 3 deletions fendermint/app/src/cmd/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use fendermint_crypto::SecretKey;
use fendermint_rocksdb::{blockstore::NamespaceBlockstore, namespaces, RocksDb, RocksDbConfig};
use fendermint_vm_actor_interface::eam::EthAddress;
use fendermint_vm_interpreter::chain::ChainEnv;
use fendermint_vm_interpreter::fvm::observe::register_metrics as register_interpreter_metrics;
use fendermint_vm_interpreter::fvm::upgrades::UpgradeScheduler;
use fendermint_vm_interpreter::{
bytes::{BytesMessageInterpreter, ProposalPrepareMode},
Expand All @@ -27,6 +28,7 @@ use fendermint_vm_topdown::voting::{publish_vote_loop, Error as VoteError, VoteT
use fendermint_vm_topdown::{CachedFinalityProvider, IPCParentFinality, Toggle};
use fvm_shared::address::{current_network, Address, Network};
use ipc_ipld_resolver::{Event as ResolverEvent, VoteRecord};
use ipc_observability::observe::register_metrics as register_default_metrics;
use ipc_provider::config::subnet::{EVMSubnet, SubnetConfig};
use ipc_provider::IpcProvider;
use libp2p::identity::secp256k1;
Expand All @@ -38,6 +40,7 @@ use tracing::info;

use crate::cmd::key::read_secret_key;
use crate::{cmd, options::run::RunArgs, settings::Settings};
use fendermint_app::observe::register_metrics as register_consensus_metrics;

cmd! {
RunArgs(self, settings) {
Expand Down Expand Up @@ -70,10 +73,11 @@ async fn run(settings: Settings) -> anyhow::Result<()> {
let metrics_registry = if settings.metrics.enabled {
let registry = prometheus::Registry::new();

register_default_metrics(&registry).context("failed to register default metrics")?;
register_topdown_metrics(&registry).context("failed to register topdown metrics")?;

fendermint_app::metrics::register_app_metrics(&registry)
.context("failed to register metrics")?;
register_interpreter_metrics(&registry)
.context("failed to register interpreter metrics")?;
register_consensus_metrics(&registry).context("failed to register consensus metrics")?;

Some(registry)
} else {
Expand Down
1 change: 0 additions & 1 deletion fendermint/app/src/metrics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,4 @@

mod prometheus;

pub use prometheus::app::register_metrics as register_app_metrics;
pub use prometheus::eth::register_metrics as register_eth_metrics;
41 changes: 1 addition & 40 deletions fendermint/app/src/metrics/prometheus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,9 @@
// SPDX-License-Identifier: Apache-2.0, MIT
//! Prometheus metrics

macro_rules! metrics {
($($name:ident : $type:ty = $desc:literal);* $(;)?) => {
$(
paste! {
lazy_static! {
pub static ref $name: $type = $type::new(stringify!([< $name:lower >]), $desc).unwrap();
}
}
)*

pub fn register_metrics(registry: &Registry) -> anyhow::Result<()> {
$(registry.register(Box::new($name.clone()))?;)*
Ok(())
}
};
}

/// Metrics emitted by endermint.
pub mod app {
use lazy_static::lazy_static;
use paste::paste;
use prometheus::{IntCounter, IntGauge, Registry};

metrics! {
BOTTOMUP_CKPT_BLOCK_HEIGHT: IntGauge = "Highest bottom-up checkpoint created";
BOTTOMUP_CKPT_CONFIG_NUM: IntGauge = "Highest configuration number checkpointed";
BOTTOMUP_CKPT_NUM_MSGS: IntCounter = "Number of bottom-up messages observed since start";

// This metrics is available in CometBFT as well, but it's something that should increase even without subnets,
// which can be a useful way to check if metrics work at all.
ABCI_COMMITTED_BLOCK_HEIGHT: IntGauge = "Highest committed block";
}
}

/// Metrics emitted by the Ethereum API facade.
pub mod eth {
// TODO - migrate these metrics to new observability architecture
use fendermint_eth_api::apis::RPC_METHOD_CALL_LATENCY_SECONDS;

pub fn register_metrics(registry: &prometheus::Registry) -> anyhow::Result<()> {
Expand All @@ -48,12 +15,6 @@ pub mod eth {

#[cfg(test)]
mod tests {
#[test]
fn can_register_app_metrics() {
let r = prometheus::Registry::new();
super::app::register_metrics(&r).unwrap();
}

#[test]
fn can_register_eth_metrics() {
let r = prometheus::Registry::new();
Expand Down
55 changes: 48 additions & 7 deletions fendermint/vm/interpreter/src/fvm/checkpoint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,18 @@ use fvm_shared::{address::Address, chainid::ChainID};

use fendermint_crypto::PublicKey;
use fendermint_crypto::SecretKey;
use fendermint_tracing::emit;
use fendermint_vm_actor_interface::eam::EthAddress;
use fendermint_vm_actor_interface::ipc::BottomUpCheckpoint;
use fendermint_vm_event::NewBottomUpCheckpoint;
use fendermint_vm_genesis::{Power, Validator, ValidatorKey};

use ipc_actors_abis::checkpointing_facet as checkpoint;
use ipc_actors_abis::gateway_getter_facet as getter;
use ipc_api::staking::ConfigurationNumber;
use ipc_observability::{emit, serde::HexEncodableBlockHash};

use super::observe::{
CheckpointCreated, CheckpointFinalized, CheckpointSigned, CheckpointSignedRole,
};
use super::state::ipc::tokens_to_burn;
use super::{
broadcast::Broadcaster,
Expand Down Expand Up @@ -121,11 +123,11 @@ where
power_diff(curr_power_table, next_power_table)
};

emit!(NewBottomUpCheckpoint {
block_height: height.value(),
block_hash: &hex::encode(block_hash),
num_msgs,
next_configuration_number,
emit(CheckpointCreated {
height: height.value(),
hash: HexEncodableBlockHash(block_hash.to_vec()),
msg_count: num_msgs,
config_number: next_configuration_number,
});

Ok(Some((checkpoint, power_updates)))
Expand Down Expand Up @@ -255,6 +257,13 @@ where
.await
.context("failed to broadcast checkpoint signature")?;

emit(CheckpointSigned {
role: CheckpointSignedRole::Own,
height: height.value(),
hash: HexEncodableBlockHash(cp.block_hash.to_vec()),
validator: validator_ctx.public_key,
});

tracing::debug!(?height, "submitted checkpoint signature");
}
}
Expand Down Expand Up @@ -290,6 +299,38 @@ where
Ok(())
}

// Emit a CheckpointFinalized trace event if a checkpoint has been finalized on the current block.
pub fn emit_trace_if_check_checkpoint_finalized<DB>(
gateway: &GatewayCaller<DB>,
state: &mut FvmExecState<DB>,
) -> anyhow::Result<()>
where
DB: Blockstore + Clone,
{
if !gateway.enabled(state)? {
return Ok(());
}

let block_height = state.block_height();
let block_hash = state
.block_hash()
.ok_or_else(|| anyhow!("block hash not set"))?;

// Check if the checkpoint has been finalized.
// If no checkpoint was emitted at this height, the QuorumInfo struct will carry zero values,
// including reached=false.
let checkpoint_quorum = gateway.checkpoint_info(state, block_height)?;

if checkpoint_quorum.reached {
emit(CheckpointFinalized {
height: block_height,
hash: HexEncodableBlockHash(block_hash.to_vec()),
})
}

Ok(())
}

fn convert_tokenizables<Source: Tokenizable, Target: Tokenizable>(
tokenizables: Vec<Source>,
) -> anyhow::Result<Vec<Target>> {
Expand Down
13 changes: 11 additions & 2 deletions fendermint/vm/interpreter/src/fvm/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ use fendermint_vm_actor_interface::{chainmetadata, cron, system};
use fvm::executor::ApplyRet;
use fvm_ipld_blockstore::Blockstore;
use fvm_shared::{address::Address, ActorID, MethodNum, BLOCK_GAS_LIMIT};
use ipc_observability::{emit, measure_time};
use ipc_observability::{emit, measure_time, observe::TracingError, Traceable};
use tendermint_rpc::Client;

use crate::ExecInterpreter;

use super::{
checkpoint::{self, PowerUpdates},
observe::{MsgExec, MsgExecPurpose},
observe::{CheckpointFinalized, MsgExec, MsgExecPurpose},
state::FvmExecState,
FvmMessage, FvmMessageInterpreter,
};
Expand Down Expand Up @@ -186,6 +186,15 @@ where
}

async fn end(&self, mut state: Self::State) -> anyhow::Result<(Self::State, Self::EndOutput)> {
// TODO: Consider doing this async, since it's purely informational and not consensus-critical.
let _ = checkpoint::emit_trace_if_check_checkpoint_finalized(&self.gateway, &mut state)
.inspect_err(|e| {
emit(TracingError {
affected_event: CheckpointFinalized::name(),
reason: e.to_string(),
});
});

let updates = if let Some((checkpoint, updates)) =
checkpoint::maybe_create_checkpoint(&self.gateway, &mut state)
.context("failed to create checkpoint")?
Expand Down
2 changes: 1 addition & 1 deletion fendermint/vm/interpreter/src/fvm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ mod checkpoint;
mod exec;
mod externs;
mod genesis;
mod observe;
pub mod observe;
mod query;
pub mod state;
pub mod store;
Expand Down
Loading
Loading