From d58d13249f6aa9e5e651df7754ba3258c6603cc5 Mon Sep 17 00:00:00 2001 From: Daniela Plascencia Date: Fri, 23 Feb 2024 12:57:12 +0100 Subject: [PATCH] tests: add a retry when asserting the up metric (#243) * tests: add a retry when asserting the up metric Adding a retry for checking the state of an alert will allow time to prometheus-k8s to scrape the necessary metrics for a unit, without it we may run into a race condition where the assertion of the metric is run before prometheus is even able to scrape. This commit adds a retry logic to avoid this. Fixes #244 --- tests/integration/test_charm.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_charm.py b/tests/integration/test_charm.py index d30ffdc..14e1036 100644 --- a/tests/integration/test_charm.py +++ b/tests/integration/test_charm.py @@ -164,6 +164,20 @@ async def check_alert_propagation(url, alert_name): assert alert_rule is not None and alert_rule["state"] == "firing" +@tenacity.retry(wait=tenacity.wait_fixed(30), stop=tenacity.stop_after_attempt(6), reraise=True) +async def assert_seldon_unit_is_available(prometheus_url): + """Assert the unit is available. + + This will be tried multiple times to avoid raising errors when prometheus-k8s + is not in an Active status. + """ + # query for the up metric and assert the unit is available + up_query_response = await fetch_url( + f'http://{prometheus_url}:9090/api/v1/query?query=up{{juju_application="{APP_NAME}"}}' + ) + assert up_query_response["data"]["result"][0]["value"][1] == "1" + + @pytest.mark.abort_on_fail @pytest.mark.asyncio async def test_seldon_alert_rules(ops_test: OpsTest): @@ -197,12 +211,6 @@ async def test_seldon_alert_rules(ops_test: OpsTest): discovered_labels = targets_result["data"]["activeTargets"][0]["discoveredLabels"] assert discovered_labels["juju_application"] == "seldon-controller-manager" - # query for the up metric and assert the unit is available - up_query_response = await fetch_url( - f'http://{prometheus_url}:9090/api/v1/query?query=up{{juju_application="{APP_NAME}"}}' - ) - assert up_query_response["data"]["result"][0]["value"][1] == "1" - # obtain alert rules from Prometheus rules_url = f"http://{prometheus_url}:9090/api/v1/rules" alert_rules_result = await fetch_url(rules_url) @@ -228,6 +236,9 @@ async def test_seldon_alert_rules(ops_test: OpsTest): for rule in rules: assert rule["name"] in rules_file_alert_names + # verify SeldonUnitIsUnavailable alert is not firing + await assert_seldon_unit_is_available(prometheus_url) + # The following integration test is optional (experimental) and might not be functioning # correctly under some conditions due to its reliance on timing of K8S deployments, timing of # Prometheus scraping, and rate calculations for alerts.