Skip to content

Commit

Permalink
feat(node): new observability bottom up tracing/metrics (#1061)
Browse files Browse the repository at this point in the history
  • Loading branch information
karlem authored Jul 22, 2024
1 parent 6aaa95c commit ed437af
Show file tree
Hide file tree
Showing 19 changed files with 323 additions and 57 deletions.
5 changes: 5 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 7 additions & 3 deletions fendermint/app/src/cmd/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use fendermint_crypto::SecretKey;
use fendermint_rocksdb::{blockstore::NamespaceBlockstore, namespaces, RocksDb, RocksDbConfig};
use fendermint_vm_actor_interface::eam::EthAddress;
use fendermint_vm_interpreter::chain::ChainEnv;
use fendermint_vm_interpreter::fvm::observe::register_metrics as register_interpreter_metrics;
use fendermint_vm_interpreter::fvm::upgrades::UpgradeScheduler;
use fendermint_vm_interpreter::{
bytes::{BytesMessageInterpreter, ProposalPrepareMode},
Expand All @@ -27,6 +28,7 @@ use fendermint_vm_topdown::voting::{publish_vote_loop, Error as VoteError, VoteT
use fendermint_vm_topdown::{CachedFinalityProvider, IPCParentFinality, Toggle};
use fvm_shared::address::{current_network, Address, Network};
use ipc_ipld_resolver::{Event as ResolverEvent, VoteRecord};
use ipc_observability::observe::register_metrics as register_default_metrics;
use ipc_provider::config::subnet::{EVMSubnet, SubnetConfig};
use ipc_provider::IpcProvider;
use libp2p::identity::secp256k1;
Expand All @@ -38,6 +40,7 @@ use tracing::info;

use crate::cmd::key::read_secret_key;
use crate::{cmd, options::run::RunArgs, settings::Settings};
use fendermint_app::observe::register_metrics as register_consensus_metrics;

cmd! {
RunArgs(self, settings) {
Expand Down Expand Up @@ -70,10 +73,11 @@ async fn run(settings: Settings) -> anyhow::Result<()> {
let metrics_registry = if settings.metrics.enabled {
let registry = prometheus::Registry::new();

register_default_metrics(&registry).context("failed to register default metrics")?;
register_topdown_metrics(&registry).context("failed to register topdown metrics")?;

fendermint_app::metrics::register_app_metrics(&registry)
.context("failed to register metrics")?;
register_interpreter_metrics(&registry)
.context("failed to register interpreter metrics")?;
register_consensus_metrics(&registry).context("failed to register consensus metrics")?;

Some(registry)
} else {
Expand Down
1 change: 0 additions & 1 deletion fendermint/app/src/metrics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,4 @@

mod prometheus;

pub use prometheus::app::register_metrics as register_app_metrics;
pub use prometheus::eth::register_metrics as register_eth_metrics;
41 changes: 1 addition & 40 deletions fendermint/app/src/metrics/prometheus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,9 @@
// SPDX-License-Identifier: Apache-2.0, MIT
//! Prometheus metrics

macro_rules! metrics {
($($name:ident : $type:ty = $desc:literal);* $(;)?) => {
$(
paste! {
lazy_static! {
pub static ref $name: $type = $type::new(stringify!([< $name:lower >]), $desc).unwrap();
}
}
)*

pub fn register_metrics(registry: &Registry) -> anyhow::Result<()> {
$(registry.register(Box::new($name.clone()))?;)*
Ok(())
}
};
}

/// Metrics emitted by endermint.
pub mod app {
use lazy_static::lazy_static;
use paste::paste;
use prometheus::{IntCounter, IntGauge, Registry};

metrics! {
BOTTOMUP_CKPT_BLOCK_HEIGHT: IntGauge = "Highest bottom-up checkpoint created";
BOTTOMUP_CKPT_CONFIG_NUM: IntGauge = "Highest configuration number checkpointed";
BOTTOMUP_CKPT_NUM_MSGS: IntCounter = "Number of bottom-up messages observed since start";

// This metrics is available in CometBFT as well, but it's something that should increase even without subnets,
// which can be a useful way to check if metrics work at all.
ABCI_COMMITTED_BLOCK_HEIGHT: IntGauge = "Highest committed block";
}
}

/// Metrics emitted by the Ethereum API facade.
pub mod eth {
// TODO - migrate these metrics to new observability architecture
use fendermint_eth_api::apis::RPC_METHOD_CALL_LATENCY_SECONDS;

pub fn register_metrics(registry: &prometheus::Registry) -> anyhow::Result<()> {
Expand All @@ -48,12 +15,6 @@ pub mod eth {

#[cfg(test)]
mod tests {
#[test]
fn can_register_app_metrics() {
let r = prometheus::Registry::new();
super::app::register_metrics(&r).unwrap();
}

#[test]
fn can_register_eth_metrics() {
let r = prometheus::Registry::new();
Expand Down
55 changes: 48 additions & 7 deletions fendermint/vm/interpreter/src/fvm/checkpoint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,18 @@ use fvm_shared::{address::Address, chainid::ChainID};

use fendermint_crypto::PublicKey;
use fendermint_crypto::SecretKey;
use fendermint_tracing::emit;
use fendermint_vm_actor_interface::eam::EthAddress;
use fendermint_vm_actor_interface::ipc::BottomUpCheckpoint;
use fendermint_vm_event::NewBottomUpCheckpoint;
use fendermint_vm_genesis::{Power, Validator, ValidatorKey};

use ipc_actors_abis::checkpointing_facet as checkpoint;
use ipc_actors_abis::gateway_getter_facet as getter;
use ipc_api::staking::ConfigurationNumber;
use ipc_observability::{emit, serde::HexEncodableBlockHash};

use super::observe::{
CheckpointCreated, CheckpointFinalized, CheckpointSigned, CheckpointSignedRole,
};
use super::state::ipc::tokens_to_burn;
use super::{
broadcast::Broadcaster,
Expand Down Expand Up @@ -121,11 +123,11 @@ where
power_diff(curr_power_table, next_power_table)
};

emit!(NewBottomUpCheckpoint {
block_height: height.value(),
block_hash: &hex::encode(block_hash),
num_msgs,
next_configuration_number,
emit(CheckpointCreated {
height: height.value(),
hash: HexEncodableBlockHash(block_hash.to_vec()),
msg_count: num_msgs,
config_number: next_configuration_number,
});

Ok(Some((checkpoint, power_updates)))
Expand Down Expand Up @@ -255,6 +257,13 @@ where
.await
.context("failed to broadcast checkpoint signature")?;

emit(CheckpointSigned {
role: CheckpointSignedRole::Own,
height: height.value(),
hash: HexEncodableBlockHash(cp.block_hash.to_vec()),
validator: validator_ctx.public_key,
});

tracing::debug!(?height, "submitted checkpoint signature");
}
}
Expand Down Expand Up @@ -290,6 +299,38 @@ where
Ok(())
}

// Emit a CheckpointFinalized trace event if a checkpoint has been finalized on the current block.
pub fn emit_trace_if_check_checkpoint_finalized<DB>(
gateway: &GatewayCaller<DB>,
state: &mut FvmExecState<DB>,
) -> anyhow::Result<()>
where
DB: Blockstore + Clone,
{
if !gateway.enabled(state)? {
return Ok(());
}

let block_height = state.block_height();
let block_hash = state
.block_hash()
.ok_or_else(|| anyhow!("block hash not set"))?;

// Check if the checkpoint has been finalized.
// If no checkpoint was emitted at this height, the QuorumInfo struct will carry zero values,
// including reached=false.
let checkpoint_quorum = gateway.checkpoint_info(state, block_height)?;

if checkpoint_quorum.reached {
emit(CheckpointFinalized {
height: block_height,
hash: HexEncodableBlockHash(block_hash.to_vec()),
})
}

Ok(())
}

fn convert_tokenizables<Source: Tokenizable, Target: Tokenizable>(
tokenizables: Vec<Source>,
) -> anyhow::Result<Vec<Target>> {
Expand Down
13 changes: 11 additions & 2 deletions fendermint/vm/interpreter/src/fvm/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ use fendermint_vm_actor_interface::{chainmetadata, cron, system};
use fvm::executor::ApplyRet;
use fvm_ipld_blockstore::Blockstore;
use fvm_shared::{address::Address, ActorID, MethodNum, BLOCK_GAS_LIMIT};
use ipc_observability::{emit, measure_time};
use ipc_observability::{emit, measure_time, observe::TracingError, Traceable};
use tendermint_rpc::Client;

use crate::ExecInterpreter;

use super::{
checkpoint::{self, PowerUpdates},
observe::{MsgExec, MsgExecPurpose},
observe::{CheckpointFinalized, MsgExec, MsgExecPurpose},
state::FvmExecState,
FvmMessage, FvmMessageInterpreter,
};
Expand Down Expand Up @@ -186,6 +186,15 @@ where
}

async fn end(&self, mut state: Self::State) -> anyhow::Result<(Self::State, Self::EndOutput)> {
// TODO: Consider doing this async, since it's purely informational and not consensus-critical.
let _ = checkpoint::emit_trace_if_check_checkpoint_finalized(&self.gateway, &mut state)
.inspect_err(|e| {
emit(TracingError {
affected_event: CheckpointFinalized::name(),
reason: e.to_string(),
});
});

let updates = if let Some((checkpoint, updates)) =
checkpoint::maybe_create_checkpoint(&self.gateway, &mut state)
.context("failed to create checkpoint")?
Expand Down
2 changes: 1 addition & 1 deletion fendermint/vm/interpreter/src/fvm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ mod checkpoint;
mod exec;
mod externs;
mod genesis;
mod observe;
pub mod observe;
mod query;
pub mod state;
pub mod store;
Expand Down
Loading

0 comments on commit ed437af

Please sign in to comment.