Skip to content

Commit

Permalink
Merge pull request #23468 from pgellert/vbotbuildovich/backport-23367…
Browse files Browse the repository at this point in the history
…-v23.3.x-95

[v23.3.x] CORE-7338 license: add  metric
  • Loading branch information
pgellert authored Sep 25, 2024
2 parents 2fc6a8c + 9a6df45 commit def452f
Show file tree
Hide file tree
Showing 10 changed files with 195 additions and 5 deletions.
2 changes: 2 additions & 0 deletions src/v/features/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ v_cc_library(
v::model
v::config
v::version
v::security
v::metrics
)

add_dependencies(v_features kafka_codegen_headers)
Expand Down
78 changes: 78 additions & 0 deletions src/v/features/feature_table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,18 @@
#include "cluster/types.h"
#include "config/node_config.h"
#include "features/logger.h"
#include "metrics/metrics.h"
#include "prometheus/prometheus_sanitize.h"
#include "version.h"

#include <seastar/core/abort_source.hh>

#include <chrono>
#include <memory>

// The feature table is closely related to cluster and uses many types from it
using namespace cluster;
using namespace std::chrono_literals;

namespace features {

Expand Down Expand Up @@ -189,6 +195,60 @@ static std::array test_extra_schema{
feature_spec::prepare_policy::always},
};

class feature_table::probe {
public:
explicit probe(const feature_table& parent)
: _parent(parent) {}

probe(const probe&) = delete;
probe& operator=(const probe&) = delete;
probe(probe&&) = delete;
probe& operator=(probe&&) = delete;
~probe() noexcept = default;

void setup_metrics() {
if (ss::this_shard_id() != 0) {
return;
}

if (!config::shard_local_cfg().disable_metrics()) {
setup_metrics_for(_metrics);
}

if (!config::shard_local_cfg().disable_public_metrics()) {
setup_metrics_for(_public_metrics);
}
}

void setup_metrics_for(metrics::metric_groups_base& metrics) {
namespace sm = ss::metrics;

static_assert(
!std::is_move_constructible_v<feature_table>
&& !std::is_move_assignable_v<feature_table>
&& !std::is_copy_constructible_v<feature_table>
&& !std::is_copy_assignable_v<feature_table>,
"The probe captures a reference to this");

metrics.add_group(
prometheus_sanitize::metrics_name("cluster:features"),
{
sm::make_gauge(
"enterprise_license_expiry_sec",
[&ft = _parent]() {
return calculate_expiry_metric(ft.get_license());
},
sm::description("Number of seconds remaining until the "
"Enterprise license expires"))
.aggregate({sm::shard_label}),
});
}

const feature_table& _parent;
metrics::internal_metric_groups _metrics;
metrics::public_metric_groups _public_metrics;
};

feature_table::feature_table() {
// Intentionally undocumented environment variable, only for use
// in integration tests.
Expand Down Expand Up @@ -221,9 +281,15 @@ feature_table::feature_table() {
}
}
}

_probe = std::make_unique<probe>(*this);
_probe->setup_metrics();
}

feature_table::~feature_table() noexcept = default;

ss::future<> feature_table::stop() {
_probe.reset();
_as.request_abort();

// Don't trust callers to have fired their abort source in the right
Expand Down Expand Up @@ -686,6 +752,18 @@ void feature_table::assert_compatible_version(bool override) {
}
}

long long feature_table::calculate_expiry_metric(
const std::optional<security::license>& license,
security::license::clock::time_point now) {
if (!license) {
return -1;
}

auto rem = license->expiration() - now;
auto rem_capped = std::max(rem.zero(), rem);
return rem_capped / 1s;
}

} // namespace features

namespace cluster {
Expand Down
15 changes: 15 additions & 0 deletions src/v/features/feature_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "utils/waiter_queue.h"

#include <array>
#include <memory>
#include <string_view>
#include <unordered_set>

Expand Down Expand Up @@ -444,6 +445,11 @@ class feature_table {
static cluster::cluster_version get_earliest_logical_version();

feature_table();
feature_table(const feature_table&) = delete;
feature_table& operator=(const feature_table&) = delete;
feature_table(feature_table&&) = delete;
feature_table& operator=(feature_table&&) = delete;
~feature_table() noexcept;

feature_state& get_state(feature f_id);
const feature_state& get_state(feature f_id) const {
Expand Down Expand Up @@ -545,7 +551,15 @@ class feature_table {
// Assert out on startup if we appear to have upgraded too far
void assert_compatible_version(bool);

// Visible for testing
static long long calculate_expiry_metric(
const std::optional<security::license>& license,
security::license::clock::time_point now
= security::license::clock::now());

private:
class probe;

// Only for use by our friends feature backend & manager
void set_active_version(
cluster::cluster_version,
Expand Down Expand Up @@ -601,6 +615,7 @@ class feature_table {

ss::gate _gate;
ss::abort_source _as;
std::unique_ptr<probe> _probe;
};

} // namespace features
Expand Down
23 changes: 23 additions & 0 deletions src/v/features/tests/feature_table_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include "features/feature_table.h"
#include "features/feature_table_snapshot.h"
#include "security/license.h"
#include "test_utils/fixture.h"
#include "vlog.h"

Expand Down Expand Up @@ -334,3 +335,25 @@ FIXTURE_TEST(feature_table_old_snapshot, feature_table_fixture) {
ft.get_state(feature::test_alpha).get_state()
== feature_state::state::active);
}

SEASTAR_THREAD_TEST_CASE(feature_table_probe_expiry_metric_test) {
using ft = features::feature_table;
const char* sample_valid_license = std::getenv("REDPANDA_SAMPLE_LICENSE");
if (sample_valid_license == nullptr) {
const char* is_on_ci = std::getenv("CI");
BOOST_TEST_REQUIRE(
!is_on_ci,
"Expecting the REDPANDA_SAMPLE_LICENSE env var in the CI "
"enviornment");
return;
}
const ss::sstring license_str{sample_valid_license};
const auto license = security::make_license(license_str);

auto expiry = security::license::clock::time_point{4813252273s};

BOOST_CHECK_EQUAL(ft::calculate_expiry_metric(license, expiry - 1s), 1);
BOOST_CHECK_EQUAL(ft::calculate_expiry_metric(license, expiry), 0);
BOOST_CHECK_EQUAL(ft::calculate_expiry_metric(license, expiry + 1s), 0);
BOOST_CHECK_EQUAL(ft::calculate_expiry_metric(std::nullopt), -1);
}
13 changes: 10 additions & 3 deletions src/v/security/license.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
#include <cryptopp/rsa.h>
#include <cryptopp/sha.h>

#include <algorithm>
#include <chrono>

using namespace std::chrono_literals;

namespace security {

namespace crypto {
Expand Down Expand Up @@ -223,9 +228,11 @@ license make_license(const ss::sstring& raw_license) {
}

bool license::is_expired() const noexcept {
const auto now = std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::system_clock::now().time_since_epoch());
return now > expiry;
return clock::now() > expiration();
}

license::clock::time_point license::expiration() const noexcept {
return clock::time_point{expiry};
}

} // namespace security
Expand Down
6 changes: 6 additions & 0 deletions src/v/security/license.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <boost/date_time/gregorian/gregorian_types.hpp>
#include <fmt/core.h>

#include <chrono>
#include <exception>
#include <fstream>
#include <vector>
Expand Down Expand Up @@ -64,6 +65,8 @@ inline std::ostream& operator<<(std::ostream& os, license_type lt) {

struct license
: serde::envelope<license, serde::version<1>, serde::compat_version<0>> {
using clock = std::chrono::system_clock;

/// Expected encoded contents
uint8_t format_version;
license_type type;
Expand All @@ -81,6 +84,9 @@ struct license
/// Seconds since epoch until license expiration
std::chrono::seconds expires() const noexcept;

/// Expiration timepoint
clock::time_point expiration() const noexcept;

auto operator<=>(const license&) const = delete;

private:
Expand Down
8 changes: 8 additions & 0 deletions src/v/security/tests/license_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/test/unit_test.hpp>

#include <chrono>

using namespace std::chrono_literals;

namespace security {

BOOST_AUTO_TEST_CASE(test_license_invalid_signature) {
Expand Down Expand Up @@ -76,7 +81,10 @@ BOOST_AUTO_TEST_CASE(test_license_valid_content) {
BOOST_CHECK_EQUAL(license.format_version, 0);
BOOST_CHECK_EQUAL(license.type, license_type::enterprise);
BOOST_CHECK_EQUAL(license.organization, "redpanda-testing");
BOOST_CHECK(!license.is_expired());
BOOST_CHECK_EQUAL(license.expiry.count(), 4813252273);
BOOST_CHECK(
license.expiration() == license::clock::time_point{4813252273s});
BOOST_CHECK_EQUAL(
license.checksum,
"2730125070a934ca1067ed073d7159acc9975dc61015892308aae186f7455daf");
Expand Down
12 changes: 12 additions & 0 deletions src/v/storage/tests/segment_deduplication_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0

#include "config/configuration.h"
#include "gmock/gmock.h"
#include "random/generators.h"
#include "storage/chunk_cache.h"
Expand Down Expand Up @@ -158,6 +159,17 @@ TEST(FindSlidingRangeTest, TestCollectOneRecordSegments) {
}

TEST(BuildOffsetMap, TestBuildSimpleMap) {
ss::smp::invoke_on_all([] {
config::shard_local_cfg().disable_metrics.set_value(true);
config::shard_local_cfg().disable_public_metrics.set_value(true);
}).get();
auto defer_config_reset = ss::defer([] {
ss::smp::invoke_on_all([] {
config::shard_local_cfg().disable_metrics.reset();
config::shard_local_cfg().disable_public_metrics.reset();
}).get();
});

storage::disk_log_builder b;
build_segments(b, 3);
auto cleanup = ss::defer([&] { b.stop().get(); });
Expand Down
8 changes: 7 additions & 1 deletion src/v/storage/tests/storage_test_fixture.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,14 @@ class storage_test_fixture {
resources,
feature_table) {
configure_unit_test_logging();
// avoid double metric registrations
// avoid double metric registrations - disk_log_builder and other
// helpers also start a feature_table and other structs that register
// metrics
ss::smp::invoke_on_all([] {
config::shard_local_cfg().get("disable_metrics").set_value(true);
config::shard_local_cfg()
.get("disable_public_metrics")
.set_value(true);
config::shard_local_cfg()
.get("log_segment_size_min")
.set_value(std::optional<uint64_t>{});
Expand All @@ -223,6 +228,7 @@ class storage_test_fixture {
feature_table.stop().get();
ss::smp::invoke_on_all([] {
config::shard_local_cfg().get("disable_metrics").reset();
config::shard_local_cfg().get("disable_public_metrics").reset();
config::shard_local_cfg().get("log_segment_size_min").reset();
}).get();
}
Expand Down
35 changes: 34 additions & 1 deletion tests/rptest/tests/rpk_cluster_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import json

from rptest.services.cluster import cluster
from rptest.services.redpanda import RESTART_LOG_ALLOW_LIST
from rptest.services.redpanda import RESTART_LOG_ALLOW_LIST, MetricSamples, MetricsEndpoint
from rptest.util import expect_exception, get_cluster_license, get_second_cluster_license
from ducktape.utils.util import wait_until
from rptest.util import wait_until_result
Expand Down Expand Up @@ -208,6 +208,25 @@ def test_cluster_down(self):
else:
assert False, f"Unexpected success: '{r}'"

def _get_license_expiry(self) -> int:
METRICS_NAME = "cluster_features_enterprise_license_expiry_sec"

def get_metrics_value(metrics_endpoint: MetricsEndpoint) -> int:
metrics = self.redpanda.metrics_sample(
sample_pattern=METRICS_NAME, metrics_endpoint=metrics_endpoint)
assert isinstance(metrics, MetricSamples), \
f'Failed to get metrics for {METRICS_NAME}'

samples = [sample for sample in metrics.samples]
assert len(samples) == len(self.redpanda.nodes), \
f'Invalid number of samples: {len(samples)}'
return int(samples[0].value)

internal_val = get_metrics_value(MetricsEndpoint.METRICS)
public_val = get_metrics_value(MetricsEndpoint.PUBLIC_METRICS)
assert internal_val == public_val, f"Mismatch: {internal_val} != {public_val}"
return internal_val

@cluster(num_nodes=3)
def test_upload_and_query_cluster_license_rpk(self):
"""
Expand All @@ -220,6 +239,12 @@ def test_upload_and_query_cluster_license_rpk(self):
"Skipping test, REDPANDA_SAMPLE_LICENSE env var not found")
return

wait_until(lambda: self._get_license_expiry() == -1,
timeout_sec=10,
backoff_sec=1,
retry_on_exc=True,
err_msg="Unset license should return a -1 expiry")

with tempfile.NamedTemporaryFile() as tf:
tf.write(bytes(license, 'UTF-8'))
tf.seek(0)
Expand All @@ -237,6 +262,14 @@ def obtain_license():
retry_on_exc=True,
err_msg="unable to retrieve license information")

wait_until(
lambda: self._get_license_expiry() > 0,
timeout_sec=10,
backoff_sec=1,
retry_on_exc=True,
err_msg="The expiry metric should be positive with a valid license"
)

expected_license = {
'expires':
"Jul 11 2122",
Expand Down

0 comments on commit def452f

Please sign in to comment.