Skip to content

Commit

Permalink
feat(new_metircs): collect the number of primary and secondary replicas
Browse files Browse the repository at this point in the history
  • Loading branch information
empiredan committed Dec 2, 2024
1 parent d9f2600 commit 831d358
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 6 deletions.
34 changes: 28 additions & 6 deletions src/replica/replica_stub.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,16 @@ METRIC_DEFINE_gauge_int64(server,
dsn::metric_unit::kReplicas,
"The number of closing replicas");

METRIC_DEFINE_gauge_int64(server,
primary_replicas,
dsn::metric_unit::kReplicas,
"The number of primary replicas");

METRIC_DEFINE_gauge_int64(server,
secondary_replicas,
dsn::metric_unit::kReplicas,
"The number of secondary replicas");

METRIC_DEFINE_gauge_int64(server,
learning_replicas,
dsn::metric_unit::kReplicas,
Expand Down Expand Up @@ -368,6 +378,8 @@ replica_stub::replica_stub(replica_state_subscriber subscriber /*= nullptr*/,
METRIC_VAR_INIT_server(total_replicas),
METRIC_VAR_INIT_server(opening_replicas),
METRIC_VAR_INIT_server(closing_replicas),
METRIC_VAR_INIT_server(primary_replicas),
METRIC_VAR_INIT_server(secondary_replicas),
METRIC_VAR_INIT_server(learning_replicas),
METRIC_VAR_INIT_server(learning_replicas_max_duration_ms),
METRIC_VAR_INIT_server(learning_replicas_max_copy_file_bytes),
Expand Down Expand Up @@ -1625,26 +1637,30 @@ void replica_stub::on_replicas_stat()
LOG_INFO("start replicas statistics, replica_count = {}", rep_stat_info_by_gpid.size());

// statistic learning info
uint64_t learning_count = 0;
uint64_t learning_max_duration_time_ms = 0;
uint64_t learning_max_copy_file_size = 0;
uint64_t bulk_load_running_count = 0;
uint64_t bulk_load_max_ingestion_time_ms = 0;
uint64_t bulk_load_max_duration_time_ms = 0;
uint64_t splitting_count = 0;
uint64_t splitting_max_duration_time_ms = 0;
uint64_t splitting_max_async_learn_time_ms = 0;
uint64_t splitting_max_copy_file_size = 0;

std::map<partition_status::type, size_t> status_counts;
for (const auto &[_, rep_stat_info] : rep_stat_info_by_gpid) {
const auto &rep = rep_stat_info.rep;
++status_counts[rep->status()];

if (rep->status() == partition_status::PS_POTENTIAL_SECONDARY) {
learning_count++;
learning_max_duration_time_ms = std::max(
learning_max_duration_time_ms, rep->_potential_secondary_states.duration_ms());
learning_max_copy_file_size =
std::max(learning_max_copy_file_size,
rep->_potential_secondary_states.learning_copy_file_size);

continue;
}

if (rep->status() == partition_status::PS_PRIMARY ||
rep->status() == partition_status::PS_SECONDARY) {
if (rep->get_bulk_loader()->get_bulk_load_status() != bulk_load_status::BLS_INVALID) {
Expand All @@ -1654,26 +1670,32 @@ void replica_stub::on_replicas_stat()
bulk_load_max_duration_time_ms =
std::max(bulk_load_max_duration_time_ms, rep->get_bulk_loader()->duration_ms());
}

continue;
}

// splitting_max_copy_file_size, rep->_split_states.copy_file_size
if (rep->status() == partition_status::PS_PARTITION_SPLIT) {
splitting_count++;
splitting_max_duration_time_ms =
std::max(splitting_max_duration_time_ms, rep->_split_states.total_ms());
splitting_max_async_learn_time_ms =
std::max(splitting_max_async_learn_time_ms, rep->_split_states.async_learn_ms());
splitting_max_copy_file_size =
std::max(splitting_max_copy_file_size, rep->_split_states.splitting_copy_file_size);

continue;
}
}

METRIC_VAR_SET(learning_replicas, learning_count);
METRIC_VAR_SET(primary_replicas, status_counts[partition_status::PS_PRIMARY]);
METRIC_VAR_SET(secondary_replicas, status_counts[partition_status::PS_SECONDARY]);
METRIC_VAR_SET(learning_replicas, status_counts[partition_status::PS_POTENTIAL_SECONDARY]);
METRIC_VAR_SET(learning_replicas_max_duration_ms, learning_max_duration_time_ms);
METRIC_VAR_SET(learning_replicas_max_copy_file_bytes, learning_max_copy_file_size);
METRIC_VAR_SET(bulk_load_running_count, bulk_load_running_count);
METRIC_VAR_SET(bulk_load_ingestion_max_duration_ms, bulk_load_max_ingestion_time_ms);
METRIC_VAR_SET(bulk_load_max_duration_ms, bulk_load_max_duration_time_ms);
METRIC_VAR_SET(splitting_replicas, splitting_count);
METRIC_VAR_SET(splitting_replicas, status_counts[partition_status::PS_PARTITION_SPLIT]);
METRIC_VAR_SET(splitting_replicas_max_duration_ms, splitting_max_duration_time_ms);
METRIC_VAR_SET(splitting_replicas_async_learn_max_duration_ms,
splitting_max_async_learn_time_ms);
Expand Down
2 changes: 2 additions & 0 deletions src/replica/replica_stub.h
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,8 @@ class replica_stub : public serverlet<replica_stub>, public ref_counter
METRIC_VAR_DECLARE_gauge_int64(opening_replicas);
METRIC_VAR_DECLARE_gauge_int64(closing_replicas);

METRIC_VAR_DECLARE_gauge_int64(primary_replicas);
METRIC_VAR_DECLARE_gauge_int64(secondary_replicas);
METRIC_VAR_DECLARE_gauge_int64(learning_replicas);
METRIC_VAR_DECLARE_gauge_int64(learning_replicas_max_duration_ms);
METRIC_VAR_DECLARE_gauge_int64(learning_replicas_max_copy_file_bytes);
Expand Down

0 comments on commit 831d358

Please sign in to comment.