Add mp to autoscaler e2e test

kubernetes-sigs · Mar 26, 2024 · a3c05f6 · a3c05f6
1 parent 7eea1d9
commit a3c05f6
Show file tree

Hide file tree

Showing 7 changed files with 442 additions and 54 deletions.
diff --git a/exp/internal/webhooks/machinepool.go b/exp/internal/webhooks/machinepool.go
@@ -19,8 +19,11 @@ package webhooks
 import (
 	"context"
 	"fmt"
+	"strconv"
 	"strings"
 
+	"github.com/pkg/errors"
+	v1 "k8s.io/api/admission/v1"
 	apierrors "k8s.io/apimachinery/pkg/api/errors"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/util/validation/field"
@@ -36,6 +39,10 @@ import (
 )
 
 func (webhook *MachinePool) SetupWebhookWithManager(mgr ctrl.Manager) error {
+	if webhook.decoder == nil {
+		webhook.decoder = admission.NewDecoder(mgr.GetScheme())
+	}
+
 	return ctrl.NewWebhookManagedBy(mgr).
 		For(&expv1.MachinePool{}).
 		WithDefaulter(webhook).
@@ -47,27 +54,48 @@ func (webhook *MachinePool) SetupWebhookWithManager(mgr ctrl.Manager) error {
 // +kubebuilder:webhook:verbs=create;update,path=/mutate-cluster-x-k8s-io-v1beta1-machinepool,mutating=true,failurePolicy=fail,matchPolicy=Equivalent,groups=cluster.x-k8s.io,resources=machinepools,versions=v1beta1,name=default.machinepool.cluster.x-k8s.io,sideEffects=None,admissionReviewVersions=v1;v1beta1
 
 // MachinePool implements a validation and defaulting webhook for MachinePool.
-type MachinePool struct{}
+type MachinePool struct {
+	decoder *admission.Decoder
+}
 
 var _ webhook.CustomValidator = &MachinePool{}
 var _ webhook.CustomDefaulter = &MachinePool{}
 
 // Default implements webhook.Defaulter so a webhook will be registered for the type.
-func (webhook *MachinePool) Default(_ context.Context, obj runtime.Object) error {
+func (webhook *MachinePool) Default(ctx context.Context, obj runtime.Object) error {
 	m, ok := obj.(*expv1.MachinePool)
 	if !ok {
 		return apierrors.NewBadRequest(fmt.Sprintf("expected a MachinePool but got a %T", obj))
 	}
 
+	req, err := admission.RequestFromContext(ctx)
+	if err != nil {
+		return err
+	}
+	dryRun := false
+	if req.DryRun != nil {
+		dryRun = *req.DryRun
+	}
+	var oldMP *expv1.MachinePool
+	if req.Operation == v1.Update {
+		oldMP = &expv1.MachinePool{}
+		if err := webhook.decoder.DecodeRaw(req.OldObject, oldMP); err != nil {
+			return errors.Wrapf(err, "failed to decode oldObject to MachinePool")
+		}
+	}
+
 	if m.Labels == nil {
 		m.Labels = make(map[string]string)
 	}
 	m.Labels[clusterv1.ClusterNameLabel] = m.Spec.ClusterName
 
-	if m.Spec.Replicas == nil {
-		m.Spec.Replicas = ptr.To[int32](1)
+	replicas, err := calculateMachinePoolReplicas(ctx, oldMP, m, dryRun)
+	if err != nil {
+		return err
 	}
 
+	m.Spec.Replicas = ptr.To[int32](replicas)
+
 	if m.Spec.MinReadySeconds == nil {
 		m.Spec.MinReadySeconds = ptr.To[int32](0)
 	}
@@ -187,3 +215,75 @@ func (webhook *MachinePool) validate(oldObj, newObj *expv1.MachinePool) error {
 	}
 	return apierrors.NewInvalid(clusterv1.GroupVersion.WithKind("MachinePool").GroupKind(), newObj.Name, allErrs)
 }
+
+func calculateMachinePoolReplicas(ctx context.Context, oldMP *expv1.MachinePool, newMP *expv1.MachinePool, dryRun bool) (int32, error) {
+	// If replicas is already set => Keep the current value.
+	if newMP.Spec.Replicas != nil {
+		return *newMP.Spec.Replicas, nil
+	}
+
+	log := ctrl.LoggerFrom(ctx)
+
+	// If both autoscaler annotations are set, use them to calculate the default value.
+	minSizeString, hasMinSizeAnnotation := newMP.Annotations[clusterv1.AutoscalerMinSizeAnnotation]
+	maxSizeString, hasMaxSizeAnnotation := newMP.Annotations[clusterv1.AutoscalerMaxSizeAnnotation]
+	if hasMinSizeAnnotation && hasMaxSizeAnnotation {
+		minSize, err := strconv.ParseInt(minSizeString, 10, 32)
+		if err != nil {
+			return 0, errors.Wrapf(err, "failed to caculate MachinePool replicas value: could not parse the value of the %q annotation", clusterv1.AutoscalerMinSizeAnnotation)
+		}
+		maxSize, err := strconv.ParseInt(maxSizeString, 10, 32)
+		if err != nil {
+			return 0, errors.Wrapf(err, "failed to caculate MachinePool replicas value: could not parse the value of the %q annotation", clusterv1.AutoscalerMaxSizeAnnotation)
+		}
+
+		// If it's a new MachinePool => Use the min size.
+		// Note: This will result in a scale up to get into the range where autoscaler takes over.
+		if oldMP == nil {
+			if !dryRun {
+				log.V(2).Info(fmt.Sprintf("Replica field has been defaulted to %d based on the %s annotation (MP is a new MP)", minSize, clusterv1.AutoscalerMinSizeAnnotation))
+			}
+			return int32(minSize), nil
+		}
+
+		// Otherwise we are handing over the control for the replicas field for an existing MachinePool
+		// to the autoscaler.
+
+		switch {
+		// If the old MachinePool doesn't have replicas set => Use the min size.
+		// Note: As defaulting always sets the replica field, this case should not be possible
+		// We only have this handling to be 100% safe against panics.
+		case oldMP.Spec.Replicas == nil:
+			if !dryRun {
+				log.V(2).Info(fmt.Sprintf("Replica field has been defaulted to %d based on the %s annotation (old MP didn't have replicas set)", minSize, clusterv1.AutoscalerMinSizeAnnotation))
+			}
+			return int32(minSize), nil
+		// If the old MachinePool replicas are lower than min size => Use the min size.
+		// Note: This will result in a scale up to get into the range where autoscaler takes over.
+		case *oldMP.Spec.Replicas < int32(minSize):
+			if !dryRun {
+				log.V(2).Info(fmt.Sprintf("Replica field has been defaulted to %d based on the %s annotation (old MP had replicas below min size)", minSize, clusterv1.AutoscalerMinSizeAnnotation))
+			}
+			return int32(minSize), nil
+		// If the old MachinePool replicas are higher than max size => Use the max size.
+		// Note: This will result in a scale down to get into the range where autoscaler takes over.
+		case *oldMP.Spec.Replicas > int32(maxSize):
+			if !dryRun {
+				log.V(2).Info(fmt.Sprintf("Replica field has been defaulted to %d based on the %s annotation (old MP had replicas above max size)", maxSize, clusterv1.AutoscalerMaxSizeAnnotation))
+			}
+			return int32(maxSize), nil
+		// If the old MachinePool replicas are between min and max size => Keep the current value.
+		default:
+			if !dryRun {
+				log.V(2).Info(fmt.Sprintf("Replica field has been defaulted to %d based on replicas of the old MachinePool (old MP had replicas within min size / max size range)", *oldMP.Spec.Replicas))
+			}
+			return *oldMP.Spec.Replicas, nil
+		}
+	}
+
+	// If neither the default nor the autoscaler annotations are set => Default to 1.
+	if !dryRun {
+		log.V(2).Info("Replica field has been defaulted to 1")
+	}
+	return 1, nil
+}
diff --git a/exp/internal/webhooks/machinepool_test.go b/exp/internal/webhooks/machinepool_test.go
@@ -26,14 +26,15 @@ import (
 	utilfeature "k8s.io/component-base/featuregate/testing"
 	"k8s.io/utils/ptr"
 	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
 
 	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
 	expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
 	"sigs.k8s.io/cluster-api/feature"
 	"sigs.k8s.io/cluster-api/internal/webhooks/util"
 )
 
-var ctx = ctrl.SetupSignalHandler()
+var ctx = admission.NewContextWithRequest(ctrl.SetupSignalHandler(), admission.Request{})
 
 func TestMachinePoolDefault(t *testing.T) {
 	// NOTE: MachinePool feature flag is disabled by default, thus preventing to create or update MachinePool.

diff --git a/test/e2e/autoscaler.go b/test/e2e/autoscaler.go
@@ -59,8 +59,9 @@ type AutoscalerSpecInput struct {
 	// InfrastructureMachineTemplateKind should be the plural form of the InfraMachineTemplate kind.
 	// It should be specified in lower case.
 	// Example: dockermachinetemplates.
-	InfrastructureMachineTemplateKind string
-	AutoscalerVersion                 string
+	InfrastructureMachineTemplateKind     string
+	InfrastructureMachinePoolTemplateKind string
+	AutoscalerVersion                     string
 
 	// Allows to inject a function to be run after test namespace is created.
 	// If not specified, this is a no-op.
@@ -71,11 +72,13 @@ type AutoscalerSpecInput struct {
 // being deployed in the workload cluster.
 func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput) {
 	var (
-		specName         = "autoscaler"
-		input            AutoscalerSpecInput
-		namespace        *corev1.Namespace
-		cancelWatches    context.CancelFunc
-		clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult
+		specName           = "autoscaler"
+		mpNodeGroupMinSize = "1"
+		mpNodeGroupMaxSize = "5"
+		input              AutoscalerSpecInput
+		namespace          *corev1.Namespace
+		cancelWatches      context.CancelFunc
+		clusterResources   *clusterctl.ApplyClusterTemplateAndWaitResult
 	)
 
 	BeforeEach(func() {
@@ -127,32 +130,40 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput)
 			WaitForClusterIntervals:      input.E2EConfig.GetIntervals(specName, "wait-cluster"),
 			WaitForControlPlaneIntervals: input.E2EConfig.GetIntervals(specName, "wait-control-plane"),
 			WaitForMachineDeployments:    input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"),
+			WaitForMachinePools:          input.E2EConfig.GetIntervals(specName, "wait-machine-pool-nodes"),
 		}, clusterResources)
 
 		Expect(clusterResources.Cluster.Spec.Topology).NotTo(BeNil(), "Autoscaler test expected a Classy Cluster")
 		// Ensure the MachineDeploymentTopology has the autoscaler annotations.
 		mdTopology := clusterResources.Cluster.Spec.Topology.Workers.MachineDeployments[0]
 		Expect(mdTopology.Metadata.Annotations).NotTo(BeNil(), "MachineDeployment is expected to have autoscaler annotations")
-		nodeGroupMinSize, ok := mdTopology.Metadata.Annotations[clusterv1.AutoscalerMinSizeAnnotation]
+		mdNodeGroupMinSize, ok := mdTopology.Metadata.Annotations[clusterv1.AutoscalerMinSizeAnnotation]
 		Expect(ok).To(BeTrue(), "MachineDeploymentTopology %s does not have the %q autoscaler annotation", mdTopology.Name, clusterv1.AutoscalerMinSizeAnnotation)
-		nodeGroupMaxSize, ok := mdTopology.Metadata.Annotations[clusterv1.AutoscalerMaxSizeAnnotation]
+		mdNodeGroupMaxSize, ok := mdTopology.Metadata.Annotations[clusterv1.AutoscalerMaxSizeAnnotation]
 		Expect(ok).To(BeTrue(), "MachineDeploymentTopology %s does not have the %q autoscaler annotation", mdTopology.Name, clusterv1.AutoscalerMaxSizeAnnotation)
 
+		// Ensure the MachinePoolTopology does NOT have the autoscaler annotations so we can test MachineDeployments first.
+		mpTopology := clusterResources.Cluster.Spec.Topology.Workers.MachinePools[0]
+		Expect(mpTopology.Metadata.Annotations).To(BeNil(), "MachinePool is expected to have autoscaler annotations")
+
 		// Get a ClusterProxy so we can interact with the workload cluster
 		workloadClusterProxy := input.BootstrapClusterProxy.GetWorkloadCluster(ctx, clusterResources.Cluster.Namespace, clusterResources.Cluster.Name)
-		originalReplicas := *clusterResources.MachineDeployments[0].Spec.Replicas
-		Expect(strconv.Itoa(int(originalReplicas))).To(Equal(nodeGroupMinSize), "MachineDeployment should have replicas as defined in %s", clusterv1.AutoscalerMinSizeAnnotation)
+		mdOriginalReplicas := *clusterResources.MachineDeployments[0].Spec.Replicas
+		Expect(strconv.Itoa(int(mdOriginalReplicas))).To(Equal(mdNodeGroupMinSize), "MachineDeployment should have replicas as defined in %s", clusterv1.AutoscalerMinSizeAnnotation)
+		mpOriginalReplicas := *clusterResources.MachinePools[0].Spec.Replicas
+		Expect(strconv.Itoa(int(mpOriginalReplicas))).To(Equal(mpNodeGroupMinSize), "MachinePool should have replicas as defined in %s", clusterv1.AutoscalerMinSizeAnnotation)
 
 		By("Installing the autoscaler on the workload cluster")
 		autoscalerWorkloadYAMLPath := input.E2EConfig.GetVariable(AutoscalerWorkloadYAMLPath)
 		framework.ApplyAutoscalerToWorkloadCluster(ctx, framework.ApplyAutoscalerToWorkloadClusterInput{
-			ArtifactFolder:                    input.ArtifactFolder,
-			InfrastructureMachineTemplateKind: input.InfrastructureMachineTemplateKind,
-			WorkloadYamlPath:                  autoscalerWorkloadYAMLPath,
-			ManagementClusterProxy:            input.BootstrapClusterProxy,
-			WorkloadClusterProxy:              workloadClusterProxy,
-			Cluster:                           clusterResources.Cluster,
-			AutoscalerVersion:                 input.AutoscalerVersion,
+			ArtifactFolder:                        input.ArtifactFolder,
+			InfrastructureMachineTemplateKind:     input.InfrastructureMachineTemplateKind,
+			InfrastructureMachinePoolTemplateKind: input.InfrastructureMachinePoolTemplateKind,
+			WorkloadYamlPath:                      autoscalerWorkloadYAMLPath,
+			ManagementClusterProxy:                input.BootstrapClusterProxy,
+			WorkloadClusterProxy:                  workloadClusterProxy,
+			Cluster:                               clusterResources.Cluster,
+			AutoscalerVersion:                     input.AutoscalerVersion,
 		}, input.E2EConfig.GetIntervals(specName, "wait-controllers")...)
 
 		By("Creating workload that forces the system to scale up")
@@ -161,38 +172,38 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput)
 		}, input.E2EConfig.GetIntervals(specName, "wait-autoscaler")...)
 
 		By("Checking the MachineDeployment is scaled up")
-		scaledUpReplicas := originalReplicas + 1
+		mdScaledUpReplicas := mdOriginalReplicas + 1
 		framework.AssertMachineDeploymentReplicas(ctx, framework.AssertMachineDeploymentReplicasInput{
 			Getter:                   input.BootstrapClusterProxy.GetClient(),
 			MachineDeployment:        clusterResources.MachineDeployments[0],
-			Replicas:                 scaledUpReplicas,
+			Replicas:                 mdScaledUpReplicas,
 			WaitForMachineDeployment: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"),
 		})
 
 		By("Disabling the autoscaler")
-		framework.DisableAutoscalerForMachineDeploymentTopologyAndWait(ctx, framework.DisableAutoscalerForMachineDeploymentTopologyAndWaitInput{
+		framework.DisableAutoscalerForMachineDeploymentTopologyAndWait(ctx, framework.DisableAutoscalerForMachineTopologyAndWaitInput{
 			ClusterProxy:                  input.BootstrapClusterProxy,
 			Cluster:                       clusterResources.Cluster,
 			WaitForAnnotationsToBeDropped: input.E2EConfig.GetIntervals(specName, "wait-controllers"),
 		})
 
 		By("Checking we can manually scale up the MachineDeployment")
 		// Scale up the MachineDeployment. Since autoscaler is disabled we should be able to do this.
-		excessReplicas := scaledUpReplicas + 1
+		mdExcessReplicas := mdScaledUpReplicas + 1
 		framework.ScaleAndWaitMachineDeploymentTopology(ctx, framework.ScaleAndWaitMachineDeploymentTopologyInput{
 			ClusterProxy:              input.BootstrapClusterProxy,
 			Cluster:                   clusterResources.Cluster,
-			Replicas:                  excessReplicas,
+			Replicas:                  mdExcessReplicas,
 			WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"),
 		})
 
 		By("Checking enabling autoscaler will scale down the MachineDeployment to correct size")
 		// Enable autoscaler on the MachineDeployment.
-		framework.EnableAutoscalerForMachineDeploymentTopologyAndWait(ctx, framework.EnableAutoscalerForMachineDeploymentTopologyAndWaitInput{
+		framework.EnableAutoscalerForMachineDeploymentTopologyAndWait(ctx, framework.EnableAutoscalerForMachineTopologyAndWaitInput{
 			ClusterProxy:                input.BootstrapClusterProxy,
 			Cluster:                     clusterResources.Cluster,
-			NodeGroupMinSize:            nodeGroupMinSize,
-			NodeGroupMaxSize:            nodeGroupMaxSize,
+			NodeGroupMinSize:            mdNodeGroupMinSize,
+			NodeGroupMaxSize:            mdNodeGroupMaxSize,
 			WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"),
 		})
 
@@ -202,10 +213,85 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput)
 		framework.AssertMachineDeploymentReplicas(ctx, framework.AssertMachineDeploymentReplicasInput{
 			Getter:                   input.BootstrapClusterProxy.GetClient(),
 			MachineDeployment:        clusterResources.MachineDeployments[0],
-			Replicas:                 scaledUpReplicas,
+			Replicas:                 mdScaledUpReplicas,
 			WaitForMachineDeployment: input.E2EConfig.GetIntervals(specName, "wait-controllers"),
 		})
 
+		By("Disabling the autoscaler for MachineDeployments to test MachinePools")
+		framework.DisableAutoscalerForMachineDeploymentTopologyAndWait(ctx, framework.DisableAutoscalerForMachineTopologyAndWaitInput{
+			ClusterProxy:                  input.BootstrapClusterProxy,
+			Cluster:                       clusterResources.Cluster,
+			WaitForAnnotationsToBeDropped: input.E2EConfig.GetIntervals(specName, "wait-controllers"),
+		})
+
+		By("Deleting the MachineDeployment scale up deployment")
+		framework.DeleteScaleUpDeploymentAndWait(ctx, framework.DeleteScaleUpDeploymentAndWaitInput{
+			ClusterProxy:  workloadClusterProxy,
+			WaitForDelete: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"),
+		})
+
+		By("Enabling autoscaler for the MachinePool")
+		// Enable autoscaler on the MachinePool.
+		framework.EnableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.EnableAutoscalerForMachineTopologyAndWaitInput{
+			ClusterProxy:                input.BootstrapClusterProxy,
+			Cluster:                     clusterResources.Cluster,
+			NodeGroupMinSize:            mpNodeGroupMinSize,
+			NodeGroupMaxSize:            mpNodeGroupMaxSize,
+			WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"),
+		})
+
+		By("Creating workload that forces the system to scale up")
+		framework.AddScaleUpDeploymentAndWait(ctx, framework.AddScaleUpDeploymentAndWaitInput{
+			ClusterProxy: workloadClusterProxy,
+			Name:         "mp-scale-up",
+		}, input.E2EConfig.GetIntervals(specName, "wait-autoscaler")...)
+
+		By("Checking the MachinePool is scaled up")
+		mpScaledUpReplicas := mpOriginalReplicas + 1
+		framework.AssertMachinePoolReplicas(ctx, framework.AssertMachinePoolReplicasInput{
+			Getter:             input.BootstrapClusterProxy.GetClient(),
+			MachinePool:        clusterResources.MachinePools[0],
+			Replicas:           mpScaledUpReplicas,
+			WaitForMachinePool: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"),
+		})
+
+		By("Disabling the autoscaler")
+		framework.DisableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.DisableAutoscalerForMachineTopologyAndWaitInput{
+			ClusterProxy:                  input.BootstrapClusterProxy,
+			Cluster:                       clusterResources.Cluster,
+			WaitForAnnotationsToBeDropped: input.E2EConfig.GetIntervals(specName, "wait-controllers"),
+		})
+
+		By("Checking we can manually scale up the MachinePool")
+		// Scale up the MachinePool. Since autoscaler is disabled we should be able to do this.
+		mpExcessReplicas := mpScaledUpReplicas + 1
+		framework.ScaleMachinePoolTopologyAndWait(ctx, framework.ScaleMachinePoolTopologyAndWaitInput{
+			ClusterProxy:        input.BootstrapClusterProxy,
+			Cluster:             clusterResources.Cluster,
+			Replicas:            mpExcessReplicas,
+			WaitForMachinePools: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"),
+		})
+
+		By("Checking enabling autoscaler will scale down the MachinePool to correct size")
+		// Enable autoscaler on the MachinePool.
+		framework.EnableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.EnableAutoscalerForMachineTopologyAndWaitInput{
+			ClusterProxy:                input.BootstrapClusterProxy,
+			Cluster:                     clusterResources.Cluster,
+			NodeGroupMinSize:            mpNodeGroupMinSize,
+			NodeGroupMaxSize:            mpNodeGroupMaxSize,
+			WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"),
+		})
+
+		By("Checking the MachinePool is scaled down")
+		// Since we scaled up the MachinePool manually and the workload has not changed auto scaler
+		// should detect that there are unneeded nodes and scale down the MachinePool.
+		framework.AssertMachinePoolReplicas(ctx, framework.AssertMachinePoolReplicasInput{
+			Getter:             input.BootstrapClusterProxy.GetClient(),
+			MachinePool:        clusterResources.MachinePools[0],
+			Replicas:           mpScaledUpReplicas,
+			WaitForMachinePool: input.E2EConfig.GetIntervals(specName, "wait-controllers"),
+		})
+
 		By("PASSED!")
 	})