From 0e0f66f848e7a50630a64340bc0d1ff33c10087a Mon Sep 17 00:00:00 2001 From: John Kyros Date: Wed, 31 Jan 2024 15:54:59 -0600 Subject: [PATCH] UPSTREAM: 114358: Default missing fields in HPA behaviors The description in the autoscaling API for the HorizontalPodAutoscaler suggests that HorizontalPodAutoscalerSpec's Behavior field (and its ScaleUp and ScaleDown members) are optional. And that if not supplied, defaults will be used. That's true if the entire Behavior is nil because, we go through "normalizeDesiredReplicas" instead of "normalizeDesiredReplicasWithBehaviors", but if the structure is only partially supplied, leaving some members nil, it results in nil dereferences when we end up going though normalizeDesiredReplicasWithBehaviors. So we end up in a situation where: - If Behavior is entirely absent (nil) we use defaults (good) - If Behavior is partially specified we panic (very bad) - If stabilizationWindowSeconds is nil in either ScaleUp or Scaledown, we panic (also very bad) In general, this only happens with pre-v2 HPA objects because v2 does properly fill in the default values. This commit prevents the panic by using the defaulters to ensure that unpopulated fields in the behavior objects get filled in with their defaults before they are used, so they can safely be dereferenced by later code that performs calculations on them. --- pkg/controller/podautoscaler/horizontal.go | 16 +++++++ .../podautoscaler/horizontal_test.go | 46 ++++++++++++++++++- 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/pkg/controller/podautoscaler/horizontal.go b/pkg/controller/podautoscaler/horizontal.go index 2a039962511ca..e006c631cb81b 100644 --- a/pkg/controller/podautoscaler/horizontal.go +++ b/pkg/controller/podautoscaler/horizontal.go @@ -48,6 +48,7 @@ import ( "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" "k8s.io/klog/v2" + autoscalingapiv2 "k8s.io/kubernetes/pkg/apis/autoscaling/v2" "k8s.io/kubernetes/pkg/controller" metricsclient "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics" "k8s.io/kubernetes/pkg/controller/util/selectors" @@ -679,6 +680,11 @@ func (a *HorizontalController) reconcileAutoscaler(ctx context.Context, hpaShare hpa := hpaShared.DeepCopy() hpaStatusOriginal := hpa.Status.DeepCopy() + // Make sure we don't have any nil behaviors before we touch them. They can come in with old v1 objects, + // and it's easier here than having to nil check/default later for every calculation + // See: https://issues.redhat.com/browse/OCPBUGS-12210 + a.ensureScalingBehaviorsNotPartiallyPopulated(hpa) + reference := fmt.Sprintf("%s/%s/%s", hpa.Spec.ScaleTargetRef.Kind, hpa.Namespace, hpa.Spec.ScaleTargetRef.Name) targetGV, err := schema.ParseGroupVersion(hpa.Spec.ScaleTargetRef.APIVersion) @@ -1342,3 +1348,13 @@ func min(a, b int32) int32 { } return b } + +// ensureScalingBehaviorsNotPartiallyPopulated uses the defualters to ensure that scaling behaviors cannot +// be partially populated, because if they are, the calculation code will potentially dereference the +// nil pointers and panic. +func (a *HorizontalController) ensureScalingBehaviorsNotPartiallyPopulated(hpa *autoscalingv2.HorizontalPodAutoscaler) { + if hpa.Spec.Behavior != nil { + hpa.Spec.Behavior.ScaleUp = autoscalingapiv2.GenerateHPAScaleUpRules(hpa.Spec.Behavior.ScaleUp) + hpa.Spec.Behavior.ScaleDown = autoscalingapiv2.GenerateHPAScaleDownRules(hpa.Spec.Behavior.ScaleDown) + } +} diff --git a/pkg/controller/podautoscaler/horizontal_test.go b/pkg/controller/podautoscaler/horizontal_test.go index 407e6fe0da0ff..1c7b206b49198 100644 --- a/pkg/controller/podautoscaler/horizontal_test.go +++ b/pkg/controller/podautoscaler/horizontal_test.go @@ -229,8 +229,11 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa LastScaleTime: tc.lastScaleTime, }, } + // jkyros: we were cheating our way through our tests by defaulting these here, we would + // have caught the "partially filled HPA behaviors" crashes a lot sooner otherwise + // Initialize default values - autoscalingapiv2.SetDefaults_HorizontalPodAutoscalerBehavior(&hpa) + // autoscalingapiv2.SetDefaults_HorizontalPodAutoscalerBehavior(&hpa) obj := &autoscalingv2.HorizontalPodAutoscalerList{ Items: []autoscalingv2.HorizontalPodAutoscaler{hpa}, @@ -4559,3 +4562,44 @@ func TestMultipleHPAs(t *testing.T) { assert.Equal(t, hpaCount, len(processedHPA), "Expected to process all HPAs") } + +// TestPartialBehaviors makes sure that the controller is filling in any +// partial behaviors that are nil so we don't panic +// See: https://issues.redhat.com/browse/OCPBUGS-12210 +func TestPartialBehaviors(t *testing.T) { + + t.Run("ensure no panic if scaleDown rules are missing", func(t *testing.T) { + tc := testCase{ + minReplicas: 2, + maxReplicas: 6, + specReplicas: 3, + statusReplicas: 3, + expectedDesiredReplicas: 5, + CPUTarget: 30, + verifyCPUCurrent: true, + reportedLevels: []uint64{300, 500, 700}, + reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + useMetricsAPI: true, + scaleUpRules: autoscalingapiv2.GenerateHPAScaleUpRules(nil), + } + tc.runTest(t) + }) + + t.Run("ensure no panic if scaleUp rules are missing", func(t *testing.T) { + tc := testCase{ + minReplicas: 2, + maxReplicas: 6, + specReplicas: 5, + statusReplicas: 5, + expectedDesiredReplicas: 3, + CPUTarget: 50, + verifyCPUCurrent: true, + reportedLevels: []uint64{100, 300, 500, 250, 250}, + reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + useMetricsAPI: true, + recommendations: []timestampedRecommendation{}, + scaleDownRules: autoscalingapiv2.GenerateHPAScaleDownRules(nil), + } + tc.runTest(t) + }) +}