From 78de1c702ef22c1becb876913674b9c5fa7de31b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gell=C3=A9rt=20Peresztegi-Nagy?= Date: Wed, 18 Sep 2024 17:06:03 +0100 Subject: [PATCH 1/5] security: add license::expiration Expose the expiration as a time_point. --- src/v/security/license.cc | 13 ++++++++++--- src/v/security/license.h | 6 ++++++ src/v/security/tests/license_test.cc | 8 ++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/v/security/license.cc b/src/v/security/license.cc index eb6ae618cfa3e..64ec0f7138414 100644 --- a/src/v/security/license.cc +++ b/src/v/security/license.cc @@ -18,6 +18,11 @@ #include "json/validator.h" #include "utils/base64.h" +#include +#include + +using namespace std::chrono_literals; + namespace security { namespace crypto { @@ -178,9 +183,11 @@ license make_license(const ss::sstring& raw_license) { } bool license::is_expired() const noexcept { - const auto now = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()); - return now > expiry; + return clock::now() > expiration(); +} + +license::clock::time_point license::expiration() const noexcept { + return clock::time_point{expiry}; } } // namespace security diff --git a/src/v/security/license.h b/src/v/security/license.h index 474a6b84c503c..43975b3cb5d38 100644 --- a/src/v/security/license.h +++ b/src/v/security/license.h @@ -21,6 +21,7 @@ #include +#include #include #include #include @@ -67,6 +68,8 @@ inline std::ostream& operator<<(std::ostream& os, license_type lt) { struct license : serde::envelope, serde::compat_version<0>> { + using clock = std::chrono::system_clock; + /// Expected encoded contents uint8_t format_version; license_type type; @@ -84,6 +87,9 @@ struct license /// Seconds since epoch until license expiration std::chrono::seconds expires() const noexcept; + /// Expiration timepoint + clock::time_point expiration() const noexcept; + auto operator<=>(const license&) const = delete; private: diff --git a/src/v/security/tests/license_test.cc b/src/v/security/tests/license_test.cc index 2e726ba7dd8af..2a9dc64511cc3 100644 --- a/src/v/security/tests/license_test.cc +++ b/src/v/security/tests/license_test.cc @@ -13,6 +13,11 @@ #include #include #include + +#include + +using namespace std::chrono_literals; + namespace security { BOOST_AUTO_TEST_CASE(test_license_invalid_signature) { @@ -76,7 +81,10 @@ BOOST_AUTO_TEST_CASE(test_license_valid_content) { BOOST_CHECK_EQUAL(license.format_version, 0); BOOST_CHECK_EQUAL(license.type, license_type::enterprise); BOOST_CHECK_EQUAL(license.organization, "redpanda-testing"); + BOOST_CHECK(!license.is_expired()); BOOST_CHECK_EQUAL(license.expiry.count(), 4813252273); + BOOST_CHECK( + license.expiration() == license::clock::time_point{4813252273s}); BOOST_CHECK_EQUAL( license.checksum, "2730125070a934ca1067ed073d7159acc9975dc61015892308aae186f7455daf"); From 1c8f66b467b0b5e84de6e6c424fbe65e825f5e98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gell=C3=A9rt=20Peresztegi-Nagy?= Date: Fri, 20 Sep 2024 17:31:05 +0100 Subject: [PATCH 2/5] storage: disable metrics in disk_log_builder tests These two fixture tests create multiple `feature_table`s at the same time. This commit disables emitting metrics on these tests to prevent double metric registration errors in the follow up commit of this PR. --- src/v/storage/tests/BUILD | 1 + src/v/storage/tests/segment_deduplication_test.cc | 12 ++++++++++++ src/v/storage/tests/storage_test_fixture.h | 8 +++++++- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/v/storage/tests/BUILD b/src/v/storage/tests/BUILD index f1a49f2884394..0d45547fec77b 100644 --- a/src/v/storage/tests/BUILD +++ b/src/v/storage/tests/BUILD @@ -291,6 +291,7 @@ redpanda_cc_gtest( deps = [ ":disk_log_builder", ":disk_log_builder_fixture", + "//src/v/config", "//src/v/model", "//src/v/model/tests:random", "//src/v/random:generators", diff --git a/src/v/storage/tests/segment_deduplication_test.cc b/src/v/storage/tests/segment_deduplication_test.cc index 44c03cf97047c..96a8dd44e57ec 100644 --- a/src/v/storage/tests/segment_deduplication_test.cc +++ b/src/v/storage/tests/segment_deduplication_test.cc @@ -7,6 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0 +#include "config/configuration.h" #include "gmock/gmock.h" #include "model/record_batch_types.h" #include "model/tests/random_batch.h" @@ -220,6 +221,17 @@ TEST(FindSlidingRangeTest, TestEmptySegmentNoCompactibleRecords) { } TEST(BuildOffsetMap, TestBuildSimpleMap) { + ss::smp::invoke_on_all([] { + config::shard_local_cfg().disable_metrics.set_value(true); + config::shard_local_cfg().disable_public_metrics.set_value(true); + }).get(); + auto defer_config_reset = ss::defer([] { + ss::smp::invoke_on_all([] { + config::shard_local_cfg().disable_metrics.reset(); + config::shard_local_cfg().disable_public_metrics.reset(); + }).get(); + }); + storage::disk_log_builder b; build_segments(b, 3); auto cleanup = ss::defer([&] { b.stop().get(); }); diff --git a/src/v/storage/tests/storage_test_fixture.h b/src/v/storage/tests/storage_test_fixture.h index 20dc71fd272d3..3605538c117b9 100644 --- a/src/v/storage/tests/storage_test_fixture.h +++ b/src/v/storage/tests/storage_test_fixture.h @@ -209,9 +209,14 @@ class storage_test_fixture { resources, feature_table) { configure_unit_test_logging(); - // avoid double metric registrations + // avoid double metric registrations - disk_log_builder and other + // helpers also start a feature_table and other structs that register + // metrics ss::smp::invoke_on_all([] { config::shard_local_cfg().get("disable_metrics").set_value(true); + config::shard_local_cfg() + .get("disable_public_metrics") + .set_value(true); config::shard_local_cfg() .get("log_segment_size_min") .set_value(std::optional{}); @@ -230,6 +235,7 @@ class storage_test_fixture { feature_table.stop().get(); ss::smp::invoke_on_all([] { config::shard_local_cfg().get("disable_metrics").reset(); + config::shard_local_cfg().get("disable_public_metrics").reset(); config::shard_local_cfg().get("log_segment_size_min").reset(); }).get(); } From edb5f48493aaec25ebcb10f35eb26b3ad5ae1a10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gell=C3=A9rt=20Peresztegi-Nagy?= Date: Wed, 18 Sep 2024 17:06:46 +0100 Subject: [PATCH 3/5] features: new metric enterprise_license_expiry_sec Adds a metric to allow easier monitoring of when the enterprise license is going to expiry. The dependency on the `security` module existed even previously in the bazel build but it was missing from the cmake build. --- src/v/features/BUILD | 3 + src/v/features/CMakeLists.txt | 2 + src/v/features/feature_table.cc | 78 ++++++++++++++++++++++ src/v/features/feature_table.h | 15 +++++ src/v/features/tests/feature_table_test.cc | 23 +++++++ 5 files changed, 121 insertions(+) diff --git a/src/v/features/BUILD b/src/v/features/BUILD index 34c6cdf90471b..f6d2cfb856b8d 100644 --- a/src/v/features/BUILD +++ b/src/v/features/BUILD @@ -14,6 +14,9 @@ redpanda_cc_library( "fwd.h", "logger.h", ], + implementation_deps = [ + "//src/v/metrics", + ], include_prefix = "features", visibility = ["//visibility:public"], deps = [ diff --git a/src/v/features/CMakeLists.txt b/src/v/features/CMakeLists.txt index 3c0253f0b89d4..3e128194c8088 100644 --- a/src/v/features/CMakeLists.txt +++ b/src/v/features/CMakeLists.txt @@ -10,6 +10,8 @@ v_cc_library( v::model v::config v::version + v::security + v::metrics ) add_dependencies(v_features kafka_codegen_headers) diff --git a/src/v/features/feature_table.cc b/src/v/features/feature_table.cc index 18b1c158cb00d..200cb0c694f21 100644 --- a/src/v/features/feature_table.cc +++ b/src/v/features/feature_table.cc @@ -15,12 +15,18 @@ #include "cluster/version.h" #include "config/configuration.h" #include "features/logger.h" +#include "metrics/metrics.h" +#include "metrics/prometheus_sanitize.h" #include "version/version.h" #include +#include +#include + // The feature table is closely related to cluster and uses many types from it using namespace cluster; +using namespace std::chrono_literals; namespace features { @@ -200,6 +206,60 @@ static std::array test_extra_schema{ feature_spec::prepare_policy::always}, }; +class feature_table::probe { +public: + explicit probe(const feature_table& parent) + : _parent(parent) {} + + probe(const probe&) = delete; + probe& operator=(const probe&) = delete; + probe(probe&&) = delete; + probe& operator=(probe&&) = delete; + ~probe() noexcept = default; + + void setup_metrics() { + if (ss::this_shard_id() != 0) { + return; + } + + if (!config::shard_local_cfg().disable_metrics()) { + setup_metrics_for(_metrics); + } + + if (!config::shard_local_cfg().disable_public_metrics()) { + setup_metrics_for(_public_metrics); + } + } + + void setup_metrics_for(metrics::metric_groups_base& metrics) { + namespace sm = ss::metrics; + + static_assert( + !std::is_move_constructible_v + && !std::is_move_assignable_v + && !std::is_copy_constructible_v + && !std::is_copy_assignable_v, + "The probe captures a reference to this"); + + metrics.add_group( + prometheus_sanitize::metrics_name("cluster:features"), + { + sm::make_gauge( + "enterprise_license_expiry_sec", + [&ft = _parent]() { + return calculate_expiry_metric(ft.get_license()); + }, + sm::description("Number of seconds remaining until the " + "Enterprise license expires")) + .aggregate({sm::shard_label}), + }); + } + + const feature_table& _parent; + metrics::internal_metric_groups _metrics; + metrics::public_metric_groups _public_metrics; +}; + feature_table::feature_table() { // Intentionally undocumented environment variable, only for use // in integration tests. @@ -232,9 +292,15 @@ feature_table::feature_table() { } } } + + _probe = std::make_unique(*this); + _probe->setup_metrics(); } +feature_table::~feature_table() noexcept = default; + ss::future<> feature_table::stop() { + _probe.reset(); _as.request_abort(); // Don't trust callers to have fired their abort source in the right @@ -697,6 +763,18 @@ void feature_table::assert_compatible_version(bool override) { } } +long long feature_table::calculate_expiry_metric( + const std::optional& license, + security::license::clock::time_point now) { + if (!license) { + return -1; + } + + auto rem = license->expiration() - now; + auto rem_capped = std::max(rem.zero(), rem); + return rem_capped / 1s; +} + } // namespace features namespace cluster { diff --git a/src/v/features/feature_table.h b/src/v/features/feature_table.h index 95b08f505f80f..9f57a6c6fae68 100644 --- a/src/v/features/feature_table.h +++ b/src/v/features/feature_table.h @@ -17,6 +17,7 @@ #include "utils/waiter_queue.h" #include +#include #include #include @@ -497,6 +498,11 @@ class feature_table { static cluster::cluster_version get_earliest_logical_version(); feature_table(); + feature_table(const feature_table&) = delete; + feature_table& operator=(const feature_table&) = delete; + feature_table(feature_table&&) = delete; + feature_table& operator=(feature_table&&) = delete; + ~feature_table() noexcept; feature_state& get_state(feature f_id); const feature_state& get_state(feature f_id) const { @@ -599,7 +605,15 @@ class feature_table { // Assert out on startup if we appear to have upgraded too far void assert_compatible_version(bool); + // Visible for testing + static long long calculate_expiry_metric( + const std::optional& license, + security::license::clock::time_point now + = security::license::clock::now()); + private: + class probe; + // Only for use by our friends feature backend & manager void set_active_version( cluster::cluster_version, @@ -655,6 +669,7 @@ class feature_table { ss::gate _gate; ss::abort_source _as; + std::unique_ptr _probe; }; } // namespace features diff --git a/src/v/features/tests/feature_table_test.cc b/src/v/features/tests/feature_table_test.cc index 1ca4f58c844b8..5417dca3e18c7 100644 --- a/src/v/features/tests/feature_table_test.cc +++ b/src/v/features/tests/feature_table_test.cc @@ -13,6 +13,7 @@ #include "cluster/feature_update_action.h" #include "features/feature_table.h" #include "features/feature_table_snapshot.h" +#include "security/license.h" #include "test_utils/fixture.h" #include @@ -335,3 +336,25 @@ FIXTURE_TEST(feature_table_old_snapshot, feature_table_fixture) { ft.get_state(feature::test_alpha).get_state() == feature_state::state::active); } + +SEASTAR_THREAD_TEST_CASE(feature_table_probe_expiry_metric_test) { + using ft = features::feature_table; + const char* sample_valid_license = std::getenv("REDPANDA_SAMPLE_LICENSE"); + if (sample_valid_license == nullptr) { + const char* is_on_ci = std::getenv("CI"); + BOOST_TEST_REQUIRE( + !is_on_ci, + "Expecting the REDPANDA_SAMPLE_LICENSE env var in the CI " + "enviornment"); + return; + } + const ss::sstring license_str{sample_valid_license}; + const auto license = security::make_license(license_str); + + auto expiry = security::license::clock::time_point{4813252273s}; + + BOOST_CHECK_EQUAL(ft::calculate_expiry_metric(license, expiry - 1s), 1); + BOOST_CHECK_EQUAL(ft::calculate_expiry_metric(license, expiry), 0); + BOOST_CHECK_EQUAL(ft::calculate_expiry_metric(license, expiry + 1s), 0); + BOOST_CHECK_EQUAL(ft::calculate_expiry_metric(std::nullopt), -1); +} From c44d9d8f53222dc4c50063cafc8ff020dc4e586d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gell=C3=A9rt=20Peresztegi-Nagy?= Date: Thu, 19 Sep 2024 21:16:30 +0100 Subject: [PATCH 4/5] f/test: add bazel build Adds a bazel build for the features module. The include for base/vlog.h was unused, so it was removed instead of adding a dependency. --- src/v/features/tests/BUILD | 17 +++++++++++++++++ src/v/features/tests/feature_table_test.cc | 1 - 2 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 src/v/features/tests/BUILD diff --git a/src/v/features/tests/BUILD b/src/v/features/tests/BUILD new file mode 100644 index 0000000000000..a9db134a2eeee --- /dev/null +++ b/src/v/features/tests/BUILD @@ -0,0 +1,17 @@ +load("//bazel:test.bzl", "redpanda_cc_btest") + +redpanda_cc_btest( + name = "feature_table_test", + timeout = "short", + srcs = [ + "feature_table_test.cc", + ], + deps = [ + "//src/v/cluster:features", + "//src/v/features", + "//src/v/security:license", + "//src/v/test_utils:seastar_boost", + "@seastar", + "@seastar//:testing", + ], +) diff --git a/src/v/features/tests/feature_table_test.cc b/src/v/features/tests/feature_table_test.cc index 5417dca3e18c7..ef8e32cbbd869 100644 --- a/src/v/features/tests/feature_table_test.cc +++ b/src/v/features/tests/feature_table_test.cc @@ -9,7 +9,6 @@ * by the Apache License, Version 2.0 */ -#include "base/vlog.h" #include "cluster/feature_update_action.h" #include "features/feature_table.h" #include "features/feature_table_snapshot.h" From baece2ccd6c9986a85c1af108dbb5b8387e85f1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gell=C3=A9rt=20Peresztegi-Nagy?= Date: Wed, 18 Sep 2024 17:07:57 +0100 Subject: [PATCH 5/5] dt: test enterprise_license_expiry_sec metric Adds an integration test for the license expiry metric. --- tests/rptest/tests/rpk_cluster_test.py | 35 +++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/tests/rptest/tests/rpk_cluster_test.py b/tests/rptest/tests/rpk_cluster_test.py index 5bdfc6e37a3bf..2afc6b19442eb 100644 --- a/tests/rptest/tests/rpk_cluster_test.py +++ b/tests/rptest/tests/rpk_cluster_test.py @@ -15,7 +15,7 @@ import json from rptest.services.cluster import cluster -from rptest.services.redpanda import RESTART_LOG_ALLOW_LIST +from rptest.services.redpanda import RESTART_LOG_ALLOW_LIST, MetricSamples, MetricsEndpoint from rptest.util import expect_exception, get_cluster_license, get_second_cluster_license from ducktape.utils.util import wait_until from rptest.util import wait_until_result @@ -209,6 +209,25 @@ def test_cluster_down(self): else: assert False, f"Unexpected success: '{r}'" + def _get_license_expiry(self) -> int: + METRICS_NAME = "cluster_features_enterprise_license_expiry_sec" + + def get_metrics_value(metrics_endpoint: MetricsEndpoint) -> int: + metrics = self.redpanda.metrics_sample( + sample_pattern=METRICS_NAME, metrics_endpoint=metrics_endpoint) + assert isinstance(metrics, MetricSamples), \ + f'Failed to get metrics for {METRICS_NAME}' + + samples = [sample for sample in metrics.samples] + assert len(samples) == len(self.redpanda.nodes), \ + f'Invalid number of samples: {len(samples)}' + return int(samples[0].value) + + internal_val = get_metrics_value(MetricsEndpoint.METRICS) + public_val = get_metrics_value(MetricsEndpoint.PUBLIC_METRICS) + assert internal_val == public_val, f"Mismatch: {internal_val} != {public_val}" + return internal_val + @cluster(num_nodes=3) def test_upload_and_query_cluster_license_rpk(self): """ @@ -221,6 +240,12 @@ def test_upload_and_query_cluster_license_rpk(self): "Skipping test, REDPANDA_SAMPLE_LICENSE env var not found") return + wait_until(lambda: self._get_license_expiry() == -1, + timeout_sec=10, + backoff_sec=1, + retry_on_exc=True, + err_msg="Unset license should return a -1 expiry") + with tempfile.NamedTemporaryFile() as tf: tf.write(bytes(license, 'UTF-8')) tf.seek(0) @@ -238,6 +263,14 @@ def obtain_license(): retry_on_exc=True, err_msg="unable to retrieve license information") + wait_until( + lambda: self._get_license_expiry() > 0, + timeout_sec=10, + backoff_sec=1, + retry_on_exc=True, + err_msg="The expiry metric should be positive with a valid license" + ) + expected_license = { 'expires': "Jul 11 2122",