From d20ad1688c6c5f7c6307b49794b680331e1a26bd Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Wed, 16 Aug 2023 07:28:28 -0500 Subject: [PATCH] GH-1359 Add speculative block metrics to prometheus --- .../eosio/producer_plugin/producer_plugin.hpp | 17 ++++- plugins/producer_plugin/producer_plugin.cpp | 61 ++++++++++------ plugins/prometheus_plugin/metrics.hpp | 71 +++++++++++++++++++ 3 files changed, 126 insertions(+), 23 deletions(-) diff --git a/plugins/producer_plugin/include/eosio/producer_plugin/producer_plugin.hpp b/plugins/producer_plugin/include/eosio/producer_plugin/producer_plugin.hpp index 46d25e75a9..f102dfcf0b 100644 --- a/plugins/producer_plugin/include/eosio/producer_plugin/producer_plugin.hpp +++ b/plugins/producer_plugin/include/eosio/producer_plugin/producer_plugin.hpp @@ -146,7 +146,21 @@ class producer_plugin : public appbase::plugin { static void set_test_mode(bool m) { test_mode_ = m; } - struct produced_block_metrics { + struct speculative_block_metrics { + account_name block_producer{}; + uint32_t block_num = 0; + int64_t block_total_time_us = 0; + int64_t block_idle_us = 0; + std::size_t num_success_trx = 0; + int64_t success_trx_time_us = 0; + std::size_t num_fail_trx = 0; + int64_t fail_trx_time_us = 0; + std::size_t num_transient_trx = 0; + int64_t transient_trx_time_us = 0; + int64_t block_other_time_us = 0; + }; + + struct produced_block_metrics : public speculative_block_metrics { std::size_t unapplied_transactions_total = 0; std::size_t blacklisted_transactions_total = 0; std::size_t subjective_bill_account_size_total = 0; @@ -174,6 +188,7 @@ class producer_plugin : public appbase::plugin { }; void register_update_produced_block_metrics(std::function&&); + void register_update_speculative_block_metrics(std::function&&); void register_update_incoming_block_metrics(std::function&&); inline static bool test_mode_{false}; // to be moved into appbase (application_base) diff --git a/plugins/producer_plugin/producer_plugin.cpp b/plugins/producer_plugin/producer_plugin.cpp index 3e3e7e5e1c..22a3864aac 100644 --- a/plugins/producer_plugin/producer_plugin.cpp +++ b/plugins/producer_plugin/producer_plugin.cpp @@ -17,8 +17,6 @@ #include #include -#include -#include #include #include #include @@ -307,11 +305,11 @@ struct block_time_tracker { last_time_point = now; } - void report(uint32_t block_num, account_name producer) { + void report(uint32_t block_num, account_name producer, producer_plugin::speculative_block_metrics& metrics) { using namespace std::string_literals; assert(!paused); + auto now = fc::time_point::now(); if( _log.is_enabled( fc::log_level::debug ) ) { - auto now = fc::time_point::now(); auto diff = now - clear_time_point - block_idle_time - trx_success_time - trx_fail_time - transient_trx_time - other_time; fc_dlog( _log, "Block #${n} ${p} trx idle: ${i}us out of ${t}us, success: ${sn}, ${s}us, fail: ${fn}, ${f}us, " "transient: ${ttn}, ${tt}us, other: ${o}us${rest}", @@ -321,6 +319,17 @@ struct block_time_tracker { ("ttn", transient_trx_num)("tt", transient_trx_time) ("o", other_time)("rest", diff.count() > 5 ? ", diff: "s + std::to_string(diff.count()) + "us"s : ""s ) ); } + metrics.block_producer = producer; + metrics.block_num = block_num; + metrics.block_total_time_us = (now - clear_time_point).count(); + metrics.block_idle_us = block_idle_time.count(); + metrics.num_success_trx = trx_success_num; + metrics.success_trx_time_us = trx_success_time.count(); + metrics.num_fail_trx = trx_fail_num; + metrics.fail_trx_time_us = trx_fail_time.count(); + metrics.num_transient_trx = transient_trx_num; + metrics.transient_trx_time_us = transient_trx_time.count(); + metrics.block_other_time_us = other_time.count(); } void clear() { @@ -573,6 +582,7 @@ class producer_plugin_impl : public std::enable_shared_from_this _update_produced_block_metrics; + std::function _update_speculative_block_metrics; std::function _update_incoming_block_metrics; // ro for read-only @@ -684,7 +694,10 @@ class producer_plugin_impl : public std::enable_shared_from_this().size(), - .trxs_produced_total = new_bs->block->transactions.size(), - .cpu_usage_us = br.total_cpu_usage_us, - .total_elapsed_time_us = br.total_elapsed_time.count(), - .total_time_us = br.total_time.count(), - .net_usage_us = br.total_net_usage, - .last_irreversible = chain.last_irreversible_block_num(), - .head_block_num = new_bs->block_num}); - } - ilog("Produced block ${id}... #${n} @ ${t} signed by ${p} " "[trxs: ${count}, lib: ${lib}, confirmed: ${confs}, net: ${net}, cpu: ${cpu}, elapsed: ${et}, time: ${tt}]", ("p", new_bs->header.producer)("id", new_bs->id.str().substr(8, 16))("n", new_bs->block_num)("t", new_bs->header.timestamp) @@ -2822,8 +2820,23 @@ void producer_plugin_impl::produce_block() { ("cpu", br.total_cpu_usage_us)("et", br.total_elapsed_time)("tt", br.total_time)("confs", new_bs->header.confirmed)); _time_tracker.add_other_time(); - _time_tracker.report(new_bs->block_num, new_bs->block->producer); + _time_tracker.report(new_bs->block_num, new_bs->block->producer, metrics); _time_tracker.clear(); + + if (_update_produced_block_metrics) { + metrics.unapplied_transactions_total = _unapplied_transactions.size(); + metrics.blacklisted_transactions_total = _blacklisted_transactions.size(); + metrics.subjective_bill_account_size_total = chain.get_subjective_billing().get_account_cache_size(); + metrics.scheduled_trxs_total = chain.db().get_index().size(); + metrics.trxs_produced_total = new_bs->block->transactions.size(); + metrics.cpu_usage_us = br.total_cpu_usage_us; + metrics.total_elapsed_time_us = br.total_elapsed_time.count(); + metrics.total_time_us = br.total_time.count(); + metrics.net_usage_us = br.total_net_usage; + metrics.last_irreversible = chain.last_irreversible_block_num(); + metrics.head_block_num = chain.head_block_num(); + _update_produced_block_metrics(metrics); + } } void producer_plugin::received_block(uint32_t block_num) { @@ -3057,6 +3070,10 @@ void producer_plugin::register_update_produced_block_metrics(std::function_update_produced_block_metrics = std::move(fun); } +void producer_plugin::register_update_speculative_block_metrics(std::function && fun) { + my->_update_speculative_block_metrics = std::move(fun); +} + void producer_plugin::register_update_incoming_block_metrics(std::function&& fun) { my->_update_incoming_block_metrics = std::move(fun); } diff --git a/plugins/prometheus_plugin/metrics.hpp b/plugins/prometheus_plugin/metrics.hpp index 9ea08e7839..7bfa1e9095 100644 --- a/plugins/prometheus_plugin/metrics.hpp +++ b/plugins/prometheus_plugin/metrics.hpp @@ -59,6 +59,28 @@ struct catalog_type { Counter& total_time_us_produced_block; Counter& net_usage_us_produced_block; Counter& blocks_produced; + Counter& block_total_time_us_produced_block; + Counter& block_idle_time_us_produced_block; + Counter& block_num_success_trx_produced_block; + Counter& block_success_trx_time_us_produced_block; + Counter& block_num_failed_trx_produced_block; + Counter& block_fail_trx_time_us_produced_block; + Counter& block_num_transient_trx_produced_block; + Counter& block_transient_trx_time_us_produced_block; + Counter& block_other_time_us_produced_block; + + // speculative blocks + Gauge& block_num; + Counter& blocks_speculative_num; + Counter& block_total_time_us_speculative_block; + Counter& block_idle_time_us_speculative_block; + Counter& block_num_success_trx_speculative_block; + Counter& block_success_trx_time_us_speculative_block; + Counter& block_num_failed_trx_speculative_block; + Counter& block_fail_trx_time_us_speculative_block; + Counter& block_num_transient_trx_speculative_block; + Counter& block_transient_trx_time_us_speculative_block; + Counter& block_other_time_us_speculative_block; // incoming blocks Counter& trxs_incoming_total; @@ -100,6 +122,26 @@ struct catalog_type { , total_time_us_produced_block(build("nodeos_produced_us_total", "total produced blocks total time")) , net_usage_us_produced_block(net_usage_us.Add({{"block_type", "produced"}})) , blocks_produced(build("blocks_produced", "number of blocks produced")) + , block_total_time_us_produced_block(build("total_time_us_produced_block", "total time for produced block")) + , block_idle_time_us_produced_block(build("idle_time_us_produced_block", "idle time for produced block")) + , block_num_success_trx_produced_block(build("num_success_trx_produced_block", "number of successful transactions in produced block")) + , block_success_trx_time_us_produced_block(build("success_trx_time_us_produced_block", "time for successful transactions in produced block")) + , block_num_failed_trx_produced_block(build("num_failed_trx_produced_block", "number of failed transactions during produced block")) + , block_fail_trx_time_us_produced_block(build("fail_trx_time_us_produced_block", "time for failed transactions during produced block")) + , block_num_transient_trx_produced_block(build("num_transient_trx_produced_block", "number of transient transactions during produced block")) + , block_transient_trx_time_us_produced_block(build("transient_trx_time_us_produced_block", "time for transient transactions during produced block")) + , block_other_time_us_produced_block(build("other_time_us_produced_block", "all other unaccounted time during produced block")) + , block_num(build("block_num", "current block number")) + , blocks_speculative_num(build("blocks_speculative_num", "number of speculative blocks created")) + , block_total_time_us_speculative_block(build("total_time_us_speculative_block", "total time for speculative block")) + , block_idle_time_us_speculative_block(build("idle_time_us_speculative_block", "idle time for speculative block")) + , block_num_success_trx_speculative_block(build("num_success_trx_speculative_block", "number of successful transactions in speculative block")) + , block_success_trx_time_us_speculative_block(build("success_trx_time_us_speculative_block", "time for successful transactions in speculative block")) + , block_num_failed_trx_speculative_block(build("num_failed_trx_speculative_block", "number of failed transactions during speculative block")) + , block_fail_trx_time_us_speculative_block(build("fail_trx_time_us_speculative_block", "time for failed transactions during speculative block")) + , block_num_transient_trx_speculative_block(build("num_transient_trx_speculative_block", "number of transient transactions during speculative block")) + , block_transient_trx_time_us_speculative_block(build("transient_trx_time_us_speculative_block", "time for transient transactions during speculative block")) + , block_other_time_us_speculative_block(build("other_time_us_speculative_block", "all other unaccounted time during speculative block")) , trxs_incoming_total(build("trxs_incoming_total", "number of incoming transactions")) , cpu_usage_us_incoming_block(cpu_usage_us.Add({{"block_type", "incoming"}})) , total_elapsed_time_us_incoming_block(build("nodeos_incoming_elapsed_us_total", "total incoming blocks elapsed time")) @@ -140,10 +182,35 @@ struct catalog_type { total_time_us_produced_block.Increment(metrics.total_time_us); net_usage_us_produced_block.Increment(metrics.net_usage_us); + block_num.Set(metrics.block_num); + block_total_time_us_produced_block.Increment(metrics.block_total_time_us); + block_idle_time_us_produced_block.Increment(metrics.block_idle_us); + block_num_success_trx_produced_block.Increment(metrics.num_success_trx); + block_success_trx_time_us_produced_block.Increment(metrics.success_trx_time_us); + block_num_failed_trx_produced_block.Increment(metrics.num_fail_trx); + block_fail_trx_time_us_produced_block.Increment(metrics.fail_trx_time_us); + block_num_transient_trx_produced_block.Increment(metrics.num_transient_trx); + block_transient_trx_time_us_produced_block.Increment(metrics.transient_trx_time_us); + block_other_time_us_produced_block.Increment(metrics.block_other_time_us); + last_irreversible.Set(metrics.last_irreversible); head_block_num.Set(metrics.head_block_num); } + void update(const producer_plugin::speculative_block_metrics& metrics) { + block_num.Set(metrics.block_num); + blocks_speculative_num.Increment(1); + block_total_time_us_speculative_block.Increment(metrics.block_total_time_us); + block_idle_time_us_speculative_block.Increment(metrics.block_idle_us); + block_num_success_trx_speculative_block.Increment(metrics.num_success_trx); + block_success_trx_time_us_speculative_block.Increment(metrics.success_trx_time_us); + block_num_failed_trx_speculative_block.Increment(metrics.num_fail_trx); + block_fail_trx_time_us_speculative_block.Increment(metrics.fail_trx_time_us); + block_num_transient_trx_speculative_block.Increment(metrics.num_transient_trx); + block_transient_trx_time_us_speculative_block.Increment(metrics.transient_trx_time_us); + block_other_time_us_speculative_block.Increment(metrics.block_other_time_us); + } + void update(const producer_plugin::incoming_block_metrics& metrics) { trxs_incoming_total.Increment(metrics.trxs_incoming_total); blocks_incoming.Increment(1); @@ -182,6 +249,10 @@ struct catalog_type { [&strand, this](const producer_plugin::produced_block_metrics& metrics) { strand.post([metrics, this]() { update(metrics); }); }); + producer.register_update_speculative_block_metrics( + [&strand, this](const producer_plugin::speculative_block_metrics& metrics) { + strand.post([metrics, this]() { update(metrics); }); + }); producer.register_update_incoming_block_metrics( [&strand, this](const producer_plugin::incoming_block_metrics& metrics) { strand.post([metrics, this]() { update(metrics); });