From 0a524fb5607f3b8c49f1176329c6a958badc1a1b Mon Sep 17 00:00:00 2001 From: John Kyros Date: Fri, 15 Dec 2023 18:21:38 -0600 Subject: [PATCH] Make cpu scaler test wait for metrics window The CPU scaler test assumes a default metrics window of 30s, so those testing on platforms where it is set to a larger value will potentially fail the CPU scaler test because the metrics won't be ready by the time the test starts. This: - Adds a helper that waits for either the metrics to show up in the HPA, or for some amount of time to pass, whichever happens first - Uses said helper to ensure that the metrics are ready before the CPU test starts testing scaling Signed-off-by: John Kyros --- CHANGELOG.md | 1 + tests/helper/helper.go | 28 ++++++++++++++++++++++++++++ tests/scalers/cpu/cpu_test.go | 10 ++++++++++ 3 files changed, 39 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 508ce44ade3..2c825b7b046 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -107,6 +107,7 @@ New deprecation(s): - **General**: Fix logger in Opentelemetry collector ([#5094](https://github.com/kedacore/keda/issues/5094)) - **General**: Reduce amount of gauge creations for OpenTelemetry metrics ([#5101](https://github.com/kedacore/keda/issues/5101)) - **General**: Support profiling for KEDA components ([#4789](https://github.com/kedacore/keda/issues/4789)) +- **CPU scaler**: Wait for metrics window during CPU scaler tests ([#5294](https://github.com/kedacore/keda/pull/5294)) - **Hashicorp Vault**: Improve test coverage in `pkg/scaling/resolver/hashicorpvault_handler` ([#5195](https://github.com/kedacore/keda/issues/5195)) - **Openstack Scaler**: Use Gophercloud SDK ([#3439](https://github.com/kedacore/keda/issues/3439)) diff --git a/tests/helper/helper.go b/tests/helper/helper.go index 78d264da7b7..e0c8f734cc7 100644 --- a/tests/helper/helper.go +++ b/tests/helper/helper.go @@ -10,6 +10,7 @@ import ( "crypto/rsa" "crypto/x509" "crypto/x509/pkix" + "encoding/json" "encoding/pem" "fmt" "io" @@ -403,6 +404,33 @@ func WaitForAllPodRunningInNamespace(t *testing.T, kc *kubernetes.Clientset, nam return false } +// Waits until the Horizontal Pod Autoscaler for the scaledObject reports that it has metrics available +// to calculate, or until the number of iterations are done, whichever happens first. +func WaitForHPAMetricsToPopulate(t *testing.T, kc *kubernetes.Clientset, name, namespace string, + iterations, intervalSeconds int) bool { + totalWaitDuration := time.Duration(iterations) * time.Duration(intervalSeconds) * time.Second + startedWaiting := time.Now() + for i := 0; i < iterations; i++ { + t.Logf("Waiting up to %s for HPA to populate metrics - %s so far", totalWaitDuration, time.Since(startedWaiting).Round(time.Second)) + + hpa, _ := kc.AutoscalingV2().HorizontalPodAutoscalers(namespace).Get(context.Background(), name, metav1.GetOptions{}) + if hpa.Status.CurrentMetrics != nil { + for _, currentMetric := range hpa.Status.CurrentMetrics { + // When testing on a kind cluster at least, an empty metricStatus object with a blank type shows up first, + // so we need to make sure we have *actual* resource metrics before we return + if currentMetric.Type != "" { + j, _ := json.MarshalIndent(hpa.Status.CurrentMetrics, " ", " ") + t.Logf("HPA has metrics after %s: %s", time.Since(startedWaiting), j) + return true + } + } + } + + time.Sleep(time.Duration(intervalSeconds) * time.Second) + } + return false +} + // Waits until deployment ready replica count hits target or number of iterations are done. func WaitForDeploymentReplicaReadyCount(t *testing.T, kc *kubernetes.Clientset, name, namespace string, target, iterations, intervalSeconds int) bool { diff --git a/tests/scalers/cpu/cpu_test.go b/tests/scalers/cpu/cpu_test.go index 1f1000ad019..f24922dc61d 100644 --- a/tests/scalers/cpu/cpu_test.go +++ b/tests/scalers/cpu/cpu_test.go @@ -9,6 +9,7 @@ import ( "github.com/joho/godotenv" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "k8s.io/client-go/kubernetes" . "github.com/kedacore/keda/v2/tests/helper" @@ -28,6 +29,7 @@ var ( testNamespace = fmt.Sprintf("%s-ns", testName) deploymentName = fmt.Sprintf("%s-deployment", testName) scaledObjectName = fmt.Sprintf("%s-so", testName) + hpaName = fmt.Sprintf("keda-hpa-%s-so", testName) ) type templateData struct { @@ -197,6 +199,14 @@ func scaleOut(t *testing.T, kc *kubernetes.Clientset, data templateData) { assert.True(t, WaitForDeploymentReplicaReadyCount(t, kc, deploymentName, testNamespace, 1, 60, 1), "Replica count should start out as 1") + // The default metrics-server window is 30s, and that's what keda is used to, but some platforms use things like + // prometheus-adapter, and have the window tuned to a larger window of say 5m. In that case it takes 5 minutes before + // the HPA can even start scaling, and as a result we'll fail this test unless we wait for the metrics before we start. + // We'd read the window straight from the metrics-server config, but we'd have to know too much about unusual configurations, + // so we just wait up to 10 minutes for the metrics (wherever they're coming from) before we proceed with the test. + require.True(t, WaitForHPAMetricsToPopulate(t, kc, hpaName, testNamespace, 120, 5), + "HPA should populate metrics within 10 minutes") + t.Log("--- testing scale out ---") t.Log("--- applying job ---")