kedacore · zroubalik · Dec 18, 2023 · Dec 16, 2023
@@ -107,6 +107,7 @@ New deprecation(s):
 - **General**: Fix logger in Opentelemetry collector ([#5094](https://github.com/kedacore/keda/issues/5094))
 - **General**: Reduce amount of gauge creations for OpenTelemetry metrics ([#5101](https://github.com/kedacore/keda/issues/5101))
 - **General**: Support profiling for KEDA components ([#4789](https://github.com/kedacore/keda/issues/4789))
+- **CPU scaler**: Wait for metrics window during CPU scaler tests ([#5294](https://github.com/kedacore/keda/pull/5294))
 - **Hashicorp Vault**: Improve test coverage in `pkg/scaling/resolver/hashicorpvault_handler`  ([#5195](https://github.com/kedacore/keda/issues/5195))
 - **Openstack Scaler**: Use Gophercloud SDK ([#3439](https://github.com/kedacore/keda/issues/3439))
 

@@ -10,6 +10,7 @@ import (
 	"crypto/rsa"
 	"crypto/x509"
 	"crypto/x509/pkix"
+	"encoding/json"
 	"encoding/pem"
 	"fmt"
 	"io"
@@ -403,6 +404,33 @@ func WaitForAllPodRunningInNamespace(t *testing.T, kc *kubernetes.Clientset, nam
 	return false
 }
 
+// Waits until the Horizontal Pod Autoscaler for the scaledObject reports that it has metrics available
+// to calculate, or until the number of iterations are done, whichever happens first.
+func WaitForHPAMetricsToPopulate(t *testing.T, kc *kubernetes.Clientset, name, namespace string,
+	iterations, intervalSeconds int) bool {
+	totalWaitDuration := time.Duration(iterations) * time.Duration(intervalSeconds) * time.Second
+	startedWaiting := time.Now()
+	for i := 0; i < iterations; i++ {
+		t.Logf("Waiting up to %s for HPA to populate metrics - %s so far", totalWaitDuration, time.Since(startedWaiting).Round(time.Second))
+
+		hpa, _ := kc.AutoscalingV2().HorizontalPodAutoscalers(namespace).Get(context.Background(), name, metav1.GetOptions{})
+		if hpa.Status.CurrentMetrics != nil {
+			for _, currentMetric := range hpa.Status.CurrentMetrics {
+				// When testing on a kind cluster at least, an empty metricStatus object with a blank type shows up first,
+				// so we need to make sure we have *actual* resource metrics before we return
+				if currentMetric.Type != "" {
+					j, _ := json.MarshalIndent(hpa.Status.CurrentMetrics, "  ", "    ")
+					t.Logf("HPA has metrics after %s: %s", time.Since(startedWaiting), j)
+					return true
+				}
+			}
+		}
+
+		time.Sleep(time.Duration(intervalSeconds) * time.Second)
+	}
+	return false
+}
+
 // Waits until deployment ready replica count hits target or number of iterations are done.
 func WaitForDeploymentReplicaReadyCount(t *testing.T, kc *kubernetes.Clientset, name, namespace string,
 	target, iterations, intervalSeconds int) bool {

@@ -9,6 +9,7 @@ import (
 
 	"github.com/joho/godotenv"
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 	"k8s.io/client-go/kubernetes"
 
 	. "github.com/kedacore/keda/v2/tests/helper"
@@ -28,6 +29,7 @@ var (
 	testNamespace          = fmt.Sprintf("%s-ns", testName)
 	deploymentName         = fmt.Sprintf("%s-deployment", testName)
 	scaledObjectName       = fmt.Sprintf("%s-so", testName)
+	hpaName                = fmt.Sprintf("keda-hpa-%s-so", testName)
 )
 
 type templateData struct {
@@ -197,6 +199,14 @@ func scaleOut(t *testing.T, kc *kubernetes.Clientset, data templateData) {
 	assert.True(t, WaitForDeploymentReplicaReadyCount(t, kc, deploymentName, testNamespace, 1, 60, 1),
 		"Replica count should start out as 1")
 
+	// The default metrics-server window is 30s, and that's what keda is used to, but some platforms use things like
+	// prometheus-adapter, and have the window tuned to a larger window of say 5m. In that case it takes 5 minutes before
+	// the HPA can even start scaling, and as a result we'll fail this test unless we wait for the metrics before we start.
+	// We'd read the window straight from the metrics-server config, but we'd have to know too much about unusual configurations,
+	// so we just wait up to 10 minutes for the metrics (wherever they're coming from) before we proceed with the test.
+	require.True(t, WaitForHPAMetricsToPopulate(t, kc, hpaName, testNamespace, 120, 5),
+		"HPA should populate metrics within 10 minutes")
+
 	t.Log("--- testing scale out ---")
 	t.Log("--- applying job ---")