Skip to content

Commit

Permalink
Add mp to autoscaler e2e test
Browse files Browse the repository at this point in the history
  • Loading branch information
willie-yao committed Mar 26, 2024
1 parent 7eea1d9 commit a3c05f6
Show file tree
Hide file tree
Showing 7 changed files with 442 additions and 54 deletions.
108 changes: 104 additions & 4 deletions exp/internal/webhooks/machinepool.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@ package webhooks
import (
"context"
"fmt"
"strconv"
"strings"

"github.com/pkg/errors"
v1 "k8s.io/api/admission/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/validation/field"
Expand All @@ -36,6 +39,10 @@ import (
)

func (webhook *MachinePool) SetupWebhookWithManager(mgr ctrl.Manager) error {
if webhook.decoder == nil {
webhook.decoder = admission.NewDecoder(mgr.GetScheme())
}

return ctrl.NewWebhookManagedBy(mgr).
For(&expv1.MachinePool{}).
WithDefaulter(webhook).
Expand All @@ -47,27 +54,48 @@ func (webhook *MachinePool) SetupWebhookWithManager(mgr ctrl.Manager) error {
// +kubebuilder:webhook:verbs=create;update,path=/mutate-cluster-x-k8s-io-v1beta1-machinepool,mutating=true,failurePolicy=fail,matchPolicy=Equivalent,groups=cluster.x-k8s.io,resources=machinepools,versions=v1beta1,name=default.machinepool.cluster.x-k8s.io,sideEffects=None,admissionReviewVersions=v1;v1beta1

// MachinePool implements a validation and defaulting webhook for MachinePool.
type MachinePool struct{}
type MachinePool struct {
decoder *admission.Decoder
}

var _ webhook.CustomValidator = &MachinePool{}
var _ webhook.CustomDefaulter = &MachinePool{}

// Default implements webhook.Defaulter so a webhook will be registered for the type.
func (webhook *MachinePool) Default(_ context.Context, obj runtime.Object) error {
func (webhook *MachinePool) Default(ctx context.Context, obj runtime.Object) error {
m, ok := obj.(*expv1.MachinePool)
if !ok {
return apierrors.NewBadRequest(fmt.Sprintf("expected a MachinePool but got a %T", obj))
}

req, err := admission.RequestFromContext(ctx)
if err != nil {
return err
}
dryRun := false
if req.DryRun != nil {
dryRun = *req.DryRun
}
var oldMP *expv1.MachinePool
if req.Operation == v1.Update {
oldMP = &expv1.MachinePool{}
if err := webhook.decoder.DecodeRaw(req.OldObject, oldMP); err != nil {
return errors.Wrapf(err, "failed to decode oldObject to MachinePool")
}
}

if m.Labels == nil {
m.Labels = make(map[string]string)
}
m.Labels[clusterv1.ClusterNameLabel] = m.Spec.ClusterName

if m.Spec.Replicas == nil {
m.Spec.Replicas = ptr.To[int32](1)
replicas, err := calculateMachinePoolReplicas(ctx, oldMP, m, dryRun)
if err != nil {
return err
}

m.Spec.Replicas = ptr.To[int32](replicas)

if m.Spec.MinReadySeconds == nil {
m.Spec.MinReadySeconds = ptr.To[int32](0)
}
Expand Down Expand Up @@ -187,3 +215,75 @@ func (webhook *MachinePool) validate(oldObj, newObj *expv1.MachinePool) error {
}
return apierrors.NewInvalid(clusterv1.GroupVersion.WithKind("MachinePool").GroupKind(), newObj.Name, allErrs)
}

func calculateMachinePoolReplicas(ctx context.Context, oldMP *expv1.MachinePool, newMP *expv1.MachinePool, dryRun bool) (int32, error) {
// If replicas is already set => Keep the current value.
if newMP.Spec.Replicas != nil {
return *newMP.Spec.Replicas, nil
}

log := ctrl.LoggerFrom(ctx)

// If both autoscaler annotations are set, use them to calculate the default value.
minSizeString, hasMinSizeAnnotation := newMP.Annotations[clusterv1.AutoscalerMinSizeAnnotation]
maxSizeString, hasMaxSizeAnnotation := newMP.Annotations[clusterv1.AutoscalerMaxSizeAnnotation]
if hasMinSizeAnnotation && hasMaxSizeAnnotation {
minSize, err := strconv.ParseInt(minSizeString, 10, 32)
if err != nil {
return 0, errors.Wrapf(err, "failed to caculate MachinePool replicas value: could not parse the value of the %q annotation", clusterv1.AutoscalerMinSizeAnnotation)
}
maxSize, err := strconv.ParseInt(maxSizeString, 10, 32)
if err != nil {
return 0, errors.Wrapf(err, "failed to caculate MachinePool replicas value: could not parse the value of the %q annotation", clusterv1.AutoscalerMaxSizeAnnotation)
}

// If it's a new MachinePool => Use the min size.
// Note: This will result in a scale up to get into the range where autoscaler takes over.
if oldMP == nil {
if !dryRun {
log.V(2).Info(fmt.Sprintf("Replica field has been defaulted to %d based on the %s annotation (MP is a new MP)", minSize, clusterv1.AutoscalerMinSizeAnnotation))
}
return int32(minSize), nil
}

// Otherwise we are handing over the control for the replicas field for an existing MachinePool
// to the autoscaler.

switch {
// If the old MachinePool doesn't have replicas set => Use the min size.
// Note: As defaulting always sets the replica field, this case should not be possible
// We only have this handling to be 100% safe against panics.
case oldMP.Spec.Replicas == nil:
if !dryRun {
log.V(2).Info(fmt.Sprintf("Replica field has been defaulted to %d based on the %s annotation (old MP didn't have replicas set)", minSize, clusterv1.AutoscalerMinSizeAnnotation))
}
return int32(minSize), nil
// If the old MachinePool replicas are lower than min size => Use the min size.
// Note: This will result in a scale up to get into the range where autoscaler takes over.
case *oldMP.Spec.Replicas < int32(minSize):
if !dryRun {
log.V(2).Info(fmt.Sprintf("Replica field has been defaulted to %d based on the %s annotation (old MP had replicas below min size)", minSize, clusterv1.AutoscalerMinSizeAnnotation))
}
return int32(minSize), nil
// If the old MachinePool replicas are higher than max size => Use the max size.
// Note: This will result in a scale down to get into the range where autoscaler takes over.
case *oldMP.Spec.Replicas > int32(maxSize):
if !dryRun {
log.V(2).Info(fmt.Sprintf("Replica field has been defaulted to %d based on the %s annotation (old MP had replicas above max size)", maxSize, clusterv1.AutoscalerMaxSizeAnnotation))
}
return int32(maxSize), nil
// If the old MachinePool replicas are between min and max size => Keep the current value.
default:
if !dryRun {
log.V(2).Info(fmt.Sprintf("Replica field has been defaulted to %d based on replicas of the old MachinePool (old MP had replicas within min size / max size range)", *oldMP.Spec.Replicas))
}
return *oldMP.Spec.Replicas, nil
}
}

// If neither the default nor the autoscaler annotations are set => Default to 1.
if !dryRun {
log.V(2).Info("Replica field has been defaulted to 1")
}
return 1, nil
}
3 changes: 2 additions & 1 deletion exp/internal/webhooks/machinepool_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,15 @@ import (
utilfeature "k8s.io/component-base/featuregate/testing"
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
"sigs.k8s.io/cluster-api/feature"
"sigs.k8s.io/cluster-api/internal/webhooks/util"
)

var ctx = ctrl.SetupSignalHandler()
var ctx = admission.NewContextWithRequest(ctrl.SetupSignalHandler(), admission.Request{})

func TestMachinePoolDefault(t *testing.T) {
// NOTE: MachinePool feature flag is disabled by default, thus preventing to create or update MachinePool.
Expand Down
140 changes: 113 additions & 27 deletions test/e2e/autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,9 @@ type AutoscalerSpecInput struct {
// InfrastructureMachineTemplateKind should be the plural form of the InfraMachineTemplate kind.
// It should be specified in lower case.
// Example: dockermachinetemplates.
InfrastructureMachineTemplateKind string
AutoscalerVersion string
InfrastructureMachineTemplateKind string
InfrastructureMachinePoolTemplateKind string
AutoscalerVersion string

// Allows to inject a function to be run after test namespace is created.
// If not specified, this is a no-op.
Expand All @@ -71,11 +72,13 @@ type AutoscalerSpecInput struct {
// being deployed in the workload cluster.
func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput) {
var (
specName = "autoscaler"
input AutoscalerSpecInput
namespace *corev1.Namespace
cancelWatches context.CancelFunc
clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult
specName = "autoscaler"
mpNodeGroupMinSize = "1"
mpNodeGroupMaxSize = "5"
input AutoscalerSpecInput
namespace *corev1.Namespace
cancelWatches context.CancelFunc
clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult
)

BeforeEach(func() {
Expand Down Expand Up @@ -127,32 +130,40 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput)
WaitForClusterIntervals: input.E2EConfig.GetIntervals(specName, "wait-cluster"),
WaitForControlPlaneIntervals: input.E2EConfig.GetIntervals(specName, "wait-control-plane"),
WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"),
WaitForMachinePools: input.E2EConfig.GetIntervals(specName, "wait-machine-pool-nodes"),
}, clusterResources)

Expect(clusterResources.Cluster.Spec.Topology).NotTo(BeNil(), "Autoscaler test expected a Classy Cluster")
// Ensure the MachineDeploymentTopology has the autoscaler annotations.
mdTopology := clusterResources.Cluster.Spec.Topology.Workers.MachineDeployments[0]
Expect(mdTopology.Metadata.Annotations).NotTo(BeNil(), "MachineDeployment is expected to have autoscaler annotations")
nodeGroupMinSize, ok := mdTopology.Metadata.Annotations[clusterv1.AutoscalerMinSizeAnnotation]
mdNodeGroupMinSize, ok := mdTopology.Metadata.Annotations[clusterv1.AutoscalerMinSizeAnnotation]
Expect(ok).To(BeTrue(), "MachineDeploymentTopology %s does not have the %q autoscaler annotation", mdTopology.Name, clusterv1.AutoscalerMinSizeAnnotation)
nodeGroupMaxSize, ok := mdTopology.Metadata.Annotations[clusterv1.AutoscalerMaxSizeAnnotation]
mdNodeGroupMaxSize, ok := mdTopology.Metadata.Annotations[clusterv1.AutoscalerMaxSizeAnnotation]
Expect(ok).To(BeTrue(), "MachineDeploymentTopology %s does not have the %q autoscaler annotation", mdTopology.Name, clusterv1.AutoscalerMaxSizeAnnotation)

// Ensure the MachinePoolTopology does NOT have the autoscaler annotations so we can test MachineDeployments first.
mpTopology := clusterResources.Cluster.Spec.Topology.Workers.MachinePools[0]
Expect(mpTopology.Metadata.Annotations).To(BeNil(), "MachinePool is expected to have autoscaler annotations")

// Get a ClusterProxy so we can interact with the workload cluster
workloadClusterProxy := input.BootstrapClusterProxy.GetWorkloadCluster(ctx, clusterResources.Cluster.Namespace, clusterResources.Cluster.Name)
originalReplicas := *clusterResources.MachineDeployments[0].Spec.Replicas
Expect(strconv.Itoa(int(originalReplicas))).To(Equal(nodeGroupMinSize), "MachineDeployment should have replicas as defined in %s", clusterv1.AutoscalerMinSizeAnnotation)
mdOriginalReplicas := *clusterResources.MachineDeployments[0].Spec.Replicas
Expect(strconv.Itoa(int(mdOriginalReplicas))).To(Equal(mdNodeGroupMinSize), "MachineDeployment should have replicas as defined in %s", clusterv1.AutoscalerMinSizeAnnotation)
mpOriginalReplicas := *clusterResources.MachinePools[0].Spec.Replicas
Expect(strconv.Itoa(int(mpOriginalReplicas))).To(Equal(mpNodeGroupMinSize), "MachinePool should have replicas as defined in %s", clusterv1.AutoscalerMinSizeAnnotation)

By("Installing the autoscaler on the workload cluster")
autoscalerWorkloadYAMLPath := input.E2EConfig.GetVariable(AutoscalerWorkloadYAMLPath)
framework.ApplyAutoscalerToWorkloadCluster(ctx, framework.ApplyAutoscalerToWorkloadClusterInput{
ArtifactFolder: input.ArtifactFolder,
InfrastructureMachineTemplateKind: input.InfrastructureMachineTemplateKind,
WorkloadYamlPath: autoscalerWorkloadYAMLPath,
ManagementClusterProxy: input.BootstrapClusterProxy,
WorkloadClusterProxy: workloadClusterProxy,
Cluster: clusterResources.Cluster,
AutoscalerVersion: input.AutoscalerVersion,
ArtifactFolder: input.ArtifactFolder,
InfrastructureMachineTemplateKind: input.InfrastructureMachineTemplateKind,
InfrastructureMachinePoolTemplateKind: input.InfrastructureMachinePoolTemplateKind,
WorkloadYamlPath: autoscalerWorkloadYAMLPath,
ManagementClusterProxy: input.BootstrapClusterProxy,
WorkloadClusterProxy: workloadClusterProxy,
Cluster: clusterResources.Cluster,
AutoscalerVersion: input.AutoscalerVersion,
}, input.E2EConfig.GetIntervals(specName, "wait-controllers")...)

By("Creating workload that forces the system to scale up")
Expand All @@ -161,38 +172,38 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput)
}, input.E2EConfig.GetIntervals(specName, "wait-autoscaler")...)

By("Checking the MachineDeployment is scaled up")
scaledUpReplicas := originalReplicas + 1
mdScaledUpReplicas := mdOriginalReplicas + 1
framework.AssertMachineDeploymentReplicas(ctx, framework.AssertMachineDeploymentReplicasInput{
Getter: input.BootstrapClusterProxy.GetClient(),
MachineDeployment: clusterResources.MachineDeployments[0],
Replicas: scaledUpReplicas,
Replicas: mdScaledUpReplicas,
WaitForMachineDeployment: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"),
})

By("Disabling the autoscaler")
framework.DisableAutoscalerForMachineDeploymentTopologyAndWait(ctx, framework.DisableAutoscalerForMachineDeploymentTopologyAndWaitInput{
framework.DisableAutoscalerForMachineDeploymentTopologyAndWait(ctx, framework.DisableAutoscalerForMachineTopologyAndWaitInput{
ClusterProxy: input.BootstrapClusterProxy,
Cluster: clusterResources.Cluster,
WaitForAnnotationsToBeDropped: input.E2EConfig.GetIntervals(specName, "wait-controllers"),
})

By("Checking we can manually scale up the MachineDeployment")
// Scale up the MachineDeployment. Since autoscaler is disabled we should be able to do this.
excessReplicas := scaledUpReplicas + 1
mdExcessReplicas := mdScaledUpReplicas + 1
framework.ScaleAndWaitMachineDeploymentTopology(ctx, framework.ScaleAndWaitMachineDeploymentTopologyInput{
ClusterProxy: input.BootstrapClusterProxy,
Cluster: clusterResources.Cluster,
Replicas: excessReplicas,
Replicas: mdExcessReplicas,
WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"),
})

By("Checking enabling autoscaler will scale down the MachineDeployment to correct size")
// Enable autoscaler on the MachineDeployment.
framework.EnableAutoscalerForMachineDeploymentTopologyAndWait(ctx, framework.EnableAutoscalerForMachineDeploymentTopologyAndWaitInput{
framework.EnableAutoscalerForMachineDeploymentTopologyAndWait(ctx, framework.EnableAutoscalerForMachineTopologyAndWaitInput{
ClusterProxy: input.BootstrapClusterProxy,
Cluster: clusterResources.Cluster,
NodeGroupMinSize: nodeGroupMinSize,
NodeGroupMaxSize: nodeGroupMaxSize,
NodeGroupMinSize: mdNodeGroupMinSize,
NodeGroupMaxSize: mdNodeGroupMaxSize,
WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"),
})

Expand All @@ -202,10 +213,85 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput)
framework.AssertMachineDeploymentReplicas(ctx, framework.AssertMachineDeploymentReplicasInput{
Getter: input.BootstrapClusterProxy.GetClient(),
MachineDeployment: clusterResources.MachineDeployments[0],
Replicas: scaledUpReplicas,
Replicas: mdScaledUpReplicas,
WaitForMachineDeployment: input.E2EConfig.GetIntervals(specName, "wait-controllers"),
})

By("Disabling the autoscaler for MachineDeployments to test MachinePools")
framework.DisableAutoscalerForMachineDeploymentTopologyAndWait(ctx, framework.DisableAutoscalerForMachineTopologyAndWaitInput{
ClusterProxy: input.BootstrapClusterProxy,
Cluster: clusterResources.Cluster,
WaitForAnnotationsToBeDropped: input.E2EConfig.GetIntervals(specName, "wait-controllers"),
})

By("Deleting the MachineDeployment scale up deployment")
framework.DeleteScaleUpDeploymentAndWait(ctx, framework.DeleteScaleUpDeploymentAndWaitInput{
ClusterProxy: workloadClusterProxy,
WaitForDelete: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"),
})

By("Enabling autoscaler for the MachinePool")
// Enable autoscaler on the MachinePool.
framework.EnableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.EnableAutoscalerForMachineTopologyAndWaitInput{
ClusterProxy: input.BootstrapClusterProxy,
Cluster: clusterResources.Cluster,
NodeGroupMinSize: mpNodeGroupMinSize,
NodeGroupMaxSize: mpNodeGroupMaxSize,
WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"),
})

By("Creating workload that forces the system to scale up")
framework.AddScaleUpDeploymentAndWait(ctx, framework.AddScaleUpDeploymentAndWaitInput{
ClusterProxy: workloadClusterProxy,
Name: "mp-scale-up",
}, input.E2EConfig.GetIntervals(specName, "wait-autoscaler")...)

By("Checking the MachinePool is scaled up")
mpScaledUpReplicas := mpOriginalReplicas + 1
framework.AssertMachinePoolReplicas(ctx, framework.AssertMachinePoolReplicasInput{
Getter: input.BootstrapClusterProxy.GetClient(),
MachinePool: clusterResources.MachinePools[0],
Replicas: mpScaledUpReplicas,
WaitForMachinePool: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"),
})

By("Disabling the autoscaler")
framework.DisableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.DisableAutoscalerForMachineTopologyAndWaitInput{
ClusterProxy: input.BootstrapClusterProxy,
Cluster: clusterResources.Cluster,
WaitForAnnotationsToBeDropped: input.E2EConfig.GetIntervals(specName, "wait-controllers"),
})

By("Checking we can manually scale up the MachinePool")
// Scale up the MachinePool. Since autoscaler is disabled we should be able to do this.
mpExcessReplicas := mpScaledUpReplicas + 1
framework.ScaleMachinePoolTopologyAndWait(ctx, framework.ScaleMachinePoolTopologyAndWaitInput{
ClusterProxy: input.BootstrapClusterProxy,
Cluster: clusterResources.Cluster,
Replicas: mpExcessReplicas,
WaitForMachinePools: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"),
})

By("Checking enabling autoscaler will scale down the MachinePool to correct size")
// Enable autoscaler on the MachinePool.
framework.EnableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.EnableAutoscalerForMachineTopologyAndWaitInput{
ClusterProxy: input.BootstrapClusterProxy,
Cluster: clusterResources.Cluster,
NodeGroupMinSize: mpNodeGroupMinSize,
NodeGroupMaxSize: mpNodeGroupMaxSize,
WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"),
})

By("Checking the MachinePool is scaled down")
// Since we scaled up the MachinePool manually and the workload has not changed auto scaler
// should detect that there are unneeded nodes and scale down the MachinePool.
framework.AssertMachinePoolReplicas(ctx, framework.AssertMachinePoolReplicasInput{
Getter: input.BootstrapClusterProxy.GetClient(),
MachinePool: clusterResources.MachinePools[0],
Replicas: mpScaledUpReplicas,
WaitForMachinePool: input.E2EConfig.GetIntervals(specName, "wait-controllers"),
})

By("PASSED!")
})

Expand Down
Loading

0 comments on commit a3c05f6

Please sign in to comment.