From 443dcea609626072c1ab67a63f2e21151008a822 Mon Sep 17 00:00:00 2001 From: John Kyros Date: Wed, 31 Jan 2024 15:54:59 -0600 Subject: [PATCH] UPSTREAM: 114358: Default missing fields in HPA behaviors The description in the autoscaling API for the HorizontalPodAutoscaler suggests that HorizontalPodAutoscalerSpec's Behavior field (and its ScaleUp and ScaleDown members) are optional. And that if not supplied, defaults will be used. That's true if the entire Behavior is nil because, we go through "normalizeDesiredReplicas" instead of "normalizeDesiredReplicasWithBehaviors", but if the structure is only partially supplied, leaving some members nil, it results in nil dereferences when we end up going though normalizeDesiredReplicasWithBehaviors. So we end up in a situation where: - If Behavior is entirely absent (nil) we use defaults (good) - If Behavior is partially specified we panic (very bad) - If stabilizationWindowSeconds is nil in either ScaleUp or Scaledown, we panic (also very bad) In general, this only happens with pre-v2 HPA objects because v2 does properly fill in the default values. This commit prevents the panic by using the defaulters to ensure that unpopulated fields in the behavior objects get filled in with their defaults before they are used, so they can safely be dereferenced by later code that performs calculations on them. --- pkg/controller/podautoscaler/horizontal.go | 16 +++++++ .../podautoscaler/horizontal_test.go | 46 ++++++++++++++++++- 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/pkg/controller/podautoscaler/horizontal.go b/pkg/controller/podautoscaler/horizontal.go index 7e8a84ed3dcd0..38767dfd46828 100644 --- a/pkg/controller/podautoscaler/horizontal.go +++ b/pkg/controller/podautoscaler/horizontal.go @@ -47,6 +47,7 @@ import ( "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" "k8s.io/klog/v2" + autoscalingapiv2 "k8s.io/kubernetes/pkg/apis/autoscaling/v2" "k8s.io/kubernetes/pkg/controller" metricsclient "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics" ) @@ -575,6 +576,11 @@ func (a *HorizontalController) reconcileAutoscaler(ctx context.Context, hpaShare hpa := hpaShared.DeepCopy() hpaStatusOriginal := hpa.Status.DeepCopy() + // Make sure we don't have any nil behaviors before we touch them. They can come in with old v1 objects, + // and it's easier here than having to nil check/default later for every calculation + // See: https://issues.redhat.com/browse/OCPBUGS-12210 + a.ensureScalingBehaviorsNotPartiallyPopulated(hpa) + reference := fmt.Sprintf("%s/%s/%s", hpa.Spec.ScaleTargetRef.Kind, hpa.Namespace, hpa.Spec.ScaleTargetRef.Name) targetGV, err := schema.ParseGroupVersion(hpa.Spec.ScaleTargetRef.APIVersion) @@ -1211,3 +1217,13 @@ func min(a, b int32) int32 { } return b } + +// ensureScalingBehaviorsNotPartiallyPopulated uses the defualters to ensure that scaling behaviors cannot +// be partially populated, because if they are, the calculation code will potentially dereference the +// nil pointers and panic. +func (a *HorizontalController) ensureScalingBehaviorsNotPartiallyPopulated(hpa *autoscalingv2.HorizontalPodAutoscaler) { + if hpa.Spec.Behavior != nil { + hpa.Spec.Behavior.ScaleUp = autoscalingapiv2.GenerateHPAScaleUpRules(hpa.Spec.Behavior.ScaleUp) + hpa.Spec.Behavior.ScaleDown = autoscalingapiv2.GenerateHPAScaleDownRules(hpa.Spec.Behavior.ScaleDown) + } +} diff --git a/pkg/controller/podautoscaler/horizontal_test.go b/pkg/controller/podautoscaler/horizontal_test.go index de095c159f52d..0fb7303027354 100644 --- a/pkg/controller/podautoscaler/horizontal_test.go +++ b/pkg/controller/podautoscaler/horizontal_test.go @@ -226,8 +226,11 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa LastScaleTime: tc.lastScaleTime, }, } + // jkyros: we were cheating our way through our tests by defaulting these here, we would + // have caught the "partially filled HPA behaviors" crashes a lot sooner otherwise + // Initialize default values - autoscalingapiv2.SetDefaults_HorizontalPodAutoscalerBehavior(&hpa) + // autoscalingapiv2.SetDefaults_HorizontalPodAutoscalerBehavior(&hpa) obj := &autoscalingv2.HorizontalPodAutoscalerList{ Items: []autoscalingv2.HorizontalPodAutoscaler{hpa}, @@ -4180,3 +4183,44 @@ func TestNoScaleDownOneMetricEmpty(t *testing.T) { tc.testEMClient = testEMClient tc.runTest(t) } + +// TestPartialBehaviors makes sure that the controller is filling in any +// partial behaviors that are nil so we don't panic +// See: https://issues.redhat.com/browse/OCPBUGS-12210 +func TestPartialBehaviors(t *testing.T) { + + t.Run("ensure no panic if scaleDown rules are missing", func(t *testing.T) { + tc := testCase{ + minReplicas: 2, + maxReplicas: 6, + specReplicas: 3, + statusReplicas: 3, + expectedDesiredReplicas: 5, + CPUTarget: 30, + verifyCPUCurrent: true, + reportedLevels: []uint64{300, 500, 700}, + reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + useMetricsAPI: true, + scaleUpRules: autoscalingapiv2.GenerateHPAScaleUpRules(nil), + } + tc.runTest(t) + }) + + t.Run("ensure no panic if scaleUp rules are missing", func(t *testing.T) { + tc := testCase{ + minReplicas: 2, + maxReplicas: 6, + specReplicas: 5, + statusReplicas: 5, + expectedDesiredReplicas: 3, + CPUTarget: 50, + verifyCPUCurrent: true, + reportedLevels: []uint64{100, 300, 500, 250, 250}, + reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + useMetricsAPI: true, + recommendations: []timestampedRecommendation{}, + scaleDownRules: autoscalingapiv2.GenerateHPAScaleDownRules(nil), + } + tc.runTest(t) + }) +}