Skip to content

Commit

Permalink
[#18805] yugabyted: Add ysql connection manager stats to metrics snap…
Browse files Browse the repository at this point in the history
…shotter

Summary:
Added some ysql connection manager stats to the tserver metrics snapshotter.
The metrics are enabled when `ysql_conn_mgr` is added to the `metrics_snapshotter_tserver_metrics_whitelist` gflag.
The recorded metrics are the total number of logical connections and total number of physical connections.

In order to access the shared memory where the connection manager stats are stored from the tserver, a shared memory key is needed.
This key is passed in to the tserver and stored in a new member variable.
Jira: DB-7685

Test Plan: no test plan

Reviewers: nikhil, hsunder

Reviewed By: hsunder

Subscribers: hsunder, janand, ybase, yugabyted-dev, bogdan, djiang

Differential Revision: https://phorge.dev.yugabyte.com/D29618
  • Loading branch information
djiang9001 committed Nov 2, 2023
1 parent 80a2c74 commit f272ccb
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 13 deletions.
2 changes: 1 addition & 1 deletion bin/yugabyted
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ METRICS_SNAPSHOT_LIST = [
"handler_latency_yb_tserver_TabletServerService_Write_count",
"handler_latency_yb_tserver_TabletServerService_Read_sum",
"handler_latency_yb_tserver_TabletServerService_Write_sum",
"disk_usage", "cpu_usage", "node_up"
"disk_usage", "cpu_usage", "node_up", "ysql_conn_mgr"
]

# YugaWare configs. These have their own separate subdirectory to preserve our itest flow.
Expand Down
104 changes: 93 additions & 11 deletions src/yb/tserver/metrics_snapshotter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@
#include "yb/util/tsan_util.h"
#include "yb/util/varint.h"

#include "yb/yql/ysql_conn_mgr_wrapper/ysql_conn_mgr_stats.h"

using namespace std::literals;

DEFINE_UNKNOWN_int32(metrics_snapshotter_interval_ms, 30 * 1000,
Expand Down Expand Up @@ -112,6 +114,7 @@ DEFINE_UNKNOWN_uint64(metrics_snapshotter_ttl_ms, 7 * 24 * 60 * 60 * 1000 /* 1 w
TAG_FLAG(metrics_snapshotter_ttl_ms, advanced);

DECLARE_int32(max_tables_metrics_breakdowns);
DECLARE_bool(enable_ysql_conn_mgr_stats);

using std::shared_ptr;
using std::vector;
Expand Down Expand Up @@ -143,6 +146,8 @@ class MetricsSnapshotter::Thread {
Status DoPrometheusMetricsSnapshot(const client::TableHandle& table,
shared_ptr<YBSession> session, const std::string& entity_type, const std::string& entity_id,
const std::string& metric_name, int64_t metric_val, const rapidjson::Document* details);
Status DoYsqlConnMgrMetricsSnapshot(const client::TableHandle& table,
shared_ptr<YBSession> session);
Status DoMetricsSnapshot();

void FlushSession(const std::shared_ptr<YBSession>& session,
Expand Down Expand Up @@ -312,6 +317,79 @@ Status MetricsSnapshotter::Thread::DoPrometheusMetricsSnapshot(const client::Tab
return Status::OK();
}

namespace {

constexpr uint32_t kYsqlConnMgrMaxPools = YSQL_CONN_MGR_MAX_POOLS;

constexpr auto kMetricWhitelistItemNodeUp = "node_up";
constexpr auto kMetricWhitelistItemCpuUsage = "cpu_usage";
constexpr auto kMetricWhitelistItemDiskUsage = "disk_usage";
constexpr auto kMetricWhitelistItemYsqlConnMgr = "ysql_conn_mgr";

} // namespace

Status MetricsSnapshotter::Thread::DoYsqlConnMgrMetricsSnapshot(const client::TableHandle& table,
shared_ptr<YBSession> session) {
if (!FLAGS_enable_ysql_conn_mgr_stats) {
YB_LOG_EVERY_N_SECS(WARNING, 120) << "Metrics whitelist contains ysql_conn_mgr, but "
<< "enable_ysql_conn_mgr_stats flag is false.";
return Status::OK();
}
// Below is a modified copy of the GetYsqlConnMgrStats function in
// pgsql_webserver_wrapper.cc.
std::vector<ConnectionStats> stats_list;
auto shm_key = server_->GetYsqlConnMgrStatsShmemKey();
if (shm_key == 0) {
YB_LOG_EVERY_N_SECS(WARNING, 120) << "Ysql connection manager shmem key is zero.";
return Status::OK();
}

int shmid = shmget(shm_key, 0, 0666);
if (shmid == -1) {
YB_LOG_EVERY_N_SECS(WARNING, 120) << "Unable to find ysql conn mgr stats from the shared "
<< "memory segment, with errno: "
<< strerror(errno);
return Status::OK();
}
// Attach to the segment to get a pointer to it.
auto *shmp = (struct ConnectionStats *)shmat(shmid, NULL, 0);
if (shmp == NULL) {
YB_LOG_EVERY_N_SECS(WARNING, 120) << "Unable to find ysql conn mgr stats from the shared "
<< "memory segment, with errno: "
<< strerror(errno);
return Status::OK();
}
for (uint32_t itr = 0; itr < kYsqlConnMgrMaxPools; itr++) {
if (strcmp(shmp[itr].pool_name, "") == 0) {
break;
}
stats_list.push_back(shmp[itr]);
}
// Detach from shared memory.
shmdt(shmp);
// End of modified copy of the GetYsqlConnMgrStats function.

uint64_t total_logical_connections = 0;
uint64_t total_physical_connections = 0;
for (const auto &stat : stats_list) {
if (strcmp(stat.pool_name, "control_connection") != 0) {
total_logical_connections += stat.active_clients +
stat.queued_clients +
stat.idle_or_pending_clients;
total_physical_connections += stat.active_servers + stat.idle_servers;
}
}
RETURN_NOT_OK(DoPrometheusMetricsSnapshot(table, session, "tserver",
server_->permanent_uuid(),
"total_logical_connections",
total_logical_connections));
RETURN_NOT_OK(DoPrometheusMetricsSnapshot(table, session, "tserver",
server_->permanent_uuid(),
"total_physical_connections",
total_physical_connections));
return Status::OK();
}

Status MetricsSnapshotter::Thread::DoMetricsSnapshot() {
CHECK(IsCurrentThread());

Expand All @@ -334,20 +412,20 @@ Status MetricsSnapshotter::Thread::DoMetricsSnapshot() {
WARN_NOT_OK(
server_->metric_registry()->WriteForPrometheus(&nmswriter, entity_opts, opt),
"Couldn't write metrics for native metrics storage");
for (const auto& kv : server_metrics) {
if (tserver_metrics_whitelist_.find(kv.first) != tserver_metrics_whitelist_.end()) {
for (const auto& [metric_name, metric_value] : server_metrics) {
if (tserver_metrics_whitelist_.contains(metric_name)) {
RETURN_NOT_OK(DoPrometheusMetricsSnapshot(table, session, "tserver",
server_->permanent_uuid(), kv.first, kv.second));
server_->permanent_uuid(), metric_name, metric_value));
}
}

if (tserver_metrics_whitelist_.find("node_up") != tserver_metrics_whitelist_.end()) {
if (tserver_metrics_whitelist_.contains(kMetricWhitelistItemNodeUp)) {
RETURN_NOT_OK(DoPrometheusMetricsSnapshot(table, session, "tserver",
server_->permanent_uuid(), "node_up",
1));
}

if (tserver_metrics_whitelist_.find("disk_usage") != tserver_metrics_whitelist_.end()) {
if (tserver_metrics_whitelist_.contains(kMetricWhitelistItemDiskUsage)) {
struct statvfs stat;
set<uint64_t> fs_ids;
std::vector<std::string> all_data_paths = opts_.fs_opts.data_paths;
Expand All @@ -370,7 +448,7 @@ Status MetricsSnapshotter::Thread::DoMetricsSnapshot() {
}
}

if (tserver_metrics_whitelist_.find("cpu_usage") != tserver_metrics_whitelist_.end()) {
if (tserver_metrics_whitelist_.contains(kMetricWhitelistItemCpuUsage)) {
// Store the {total_ticks, user_ticks, and system_ticks}
auto cur_ticks = CHECK_RESULT(GetCpuUsage());
bool get_cpu_success = std::all_of(
Expand Down Expand Up @@ -416,11 +494,15 @@ Status MetricsSnapshotter::Thread::DoMetricsSnapshot() {
}
}

for (const auto& kv : table_metrics) {
for (const auto& vkv : kv.second) {
if (table_metrics_whitelist_.find(vkv.first) != table_metrics_whitelist_.end()) {
RETURN_NOT_OK(DoPrometheusMetricsSnapshot(table, session, "table", kv.first, vkv.first,
vkv.second));
if (tserver_metrics_whitelist_.contains(kMetricWhitelistItemYsqlConnMgr)) {
RETURN_NOT_OK(DoYsqlConnMgrMetricsSnapshot(table, session));
}

for (const auto& [table_id, table_metrics] : table_metrics) {
for (const auto& [metric_name, metric_value] : table_metrics) {
if (table_metrics_whitelist_.contains(metric_name)) {
RETURN_NOT_OK(DoPrometheusMetricsSnapshot(table, session, "table", table_id, metric_name,
metric_value));
}
}
}
Expand Down
6 changes: 6 additions & 0 deletions src/yb/tserver/tablet_server.h
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,9 @@ class TabletServer : public DbServerBase, public TabletServerIf {

std::shared_ptr<cdc::CDCServiceImpl> GetCDCService() const { return cdc_service_; }

key_t GetYsqlConnMgrStatsShmemKey() { return ysql_conn_mgr_stats_shmem_key_; }
void SetYsqlConnMgrStatsShmemKey(key_t shmem_key) { ysql_conn_mgr_stats_shmem_key_ = shmem_key; }

protected:
virtual Status RegisterServices();

Expand Down Expand Up @@ -391,6 +394,9 @@ class TabletServer : public DbServerBase, public TabletServerIf {
// is shut down.
std::weak_ptr<PgClientServiceImpl> pg_client_service_;

// Key to shared memory for ysql connection manager stats
key_t ysql_conn_mgr_stats_shmem_key_ = 0;

private:
// Auto initialize some of the service flags that are defaulted to -1.
void AutoInitServiceFlags();
Expand Down
8 changes: 7 additions & 1 deletion src/yb/tserver/tablet_server_main_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
#include "yb/server/skewed_clock.h"
#include "yb/server/secure.h"
#include "yb/tserver/factory.h"
#include "yb/tserver/metrics_snapshotter.h"
#include "yb/tserver/tablet_server.h"

#include "yb/util/flags.h"
Expand Down Expand Up @@ -311,11 +312,16 @@ int TabletServerMain(int argc, char** argv) {
LOG_AND_RETURN_FROM_MAIN_NOT_OK(SetSslConf(server, &ysql_conn_mgr_conf));

// Construct the config file for the Ysql Connection Manager process.
const auto conn_mgr_shmem_key =
FLAGS_enable_ysql_conn_mgr_stats ? pg_supervisor->GetYsqlConnManagerStatsShmkey() : 0;
ysql_conn_mgr_supervisor = std::make_unique<ysql_conn_mgr_wrapper::YsqlConnMgrSupervisor>(
ysql_conn_mgr_conf,
FLAGS_enable_ysql_conn_mgr_stats ? pg_supervisor->GetYsqlConnManagerStatsShmkey() : 0);
conn_mgr_shmem_key);

LOG_AND_RETURN_FROM_MAIN_NOT_OK(ysql_conn_mgr_supervisor->Start());

// Set the shared memory key for tserver so it can access stats as well.
server->SetYsqlConnMgrStatsShmemKey(conn_mgr_shmem_key);
}

std::unique_ptr<RedisServer> redis_server;
Expand Down

0 comments on commit f272ccb

Please sign in to comment.