From 610afe0db6be39ddf5b9a5632665de040ab53c76 Mon Sep 17 00:00:00 2001 From: Jont828 Date: Wed, 15 Nov 2023 15:07:29 -0500 Subject: [PATCH] Remove delete requeue in DockerMachinePool controller and ensure DockerMachine ownerRef --- .../dockermachinepool_controller.go | 90 +++++++++++++++---- .../dockermachinepool_controller_phases.go | 59 ++++++------ 2 files changed, 102 insertions(+), 47 deletions(-) diff --git a/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller.go b/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller.go index e23f46b44b11..d50281f49b6e 100644 --- a/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller.go +++ b/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller.go @@ -20,7 +20,6 @@ package controllers import ( "context" "fmt" - "time" "github.com/pkg/errors" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -43,6 +42,7 @@ import ( "sigs.k8s.io/cluster-api/controllers/remote" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" utilexp "sigs.k8s.io/cluster-api/exp/util" + "sigs.k8s.io/cluster-api/internal/util/ssa" "sigs.k8s.io/cluster-api/test/infrastructure/container" infrav1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/api/v1beta1" infraexpv1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/exp/api/v1beta1" @@ -57,8 +57,7 @@ const ( // dockerMachinePoolLabel is the label used to identify the DockerMachinePool that is responsible for a Docker container. dockerMachinePoolLabel = "docker.cluster.x-k8s.io/machine-pool" - // requeueAfter is how long to wait before checking again to see if the DockerMachines are still provisioning or deleting. - requeueAfter = 10 * time.Second + dockerMachinePoolControllerName = "dockermachinepool-controller" ) // DockerMachinePoolReconciler reconciles a DockerMachinePool object. @@ -71,6 +70,7 @@ type DockerMachinePoolReconciler struct { // WatchFilterValue is the label value used to filter events prior to reconciliation. WatchFilterValue string + ssaCache ssa.Cache recorder record.EventRecorder externalTracker external.ObjectTracker } @@ -140,7 +140,7 @@ func (r *DockerMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Re // Handle deleted machines if !dockerMachinePool.ObjectMeta.DeletionTimestamp.IsZero() { - return r.reconcileDelete(ctx, cluster, machinePool, dockerMachinePool) + return ctrl.Result{}, r.reconcileDelete(ctx, cluster, machinePool, dockerMachinePool) } // Add finalizer and the InfrastructureMachineKind if they aren't already present, and requeue if either were added. @@ -194,21 +194,22 @@ func (r *DockerMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr return errors.Wrap(err, "failed setting up with a controller manager") } - r.recorder = mgr.GetEventRecorderFor("dockermachinepool-controller") + r.recorder = mgr.GetEventRecorderFor(dockerMachinePoolControllerName) r.externalTracker = external.ObjectTracker{ Controller: c, Cache: mgr.GetCache(), } + r.ssaCache = ssa.NewCache() return nil } -func (r *DockerMachinePoolReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) (ctrl.Result, error) { +func (r *DockerMachinePoolReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) error { log := ctrl.LoggerFrom(ctx) dockerMachineList, err := getDockerMachines(ctx, r.Client, *cluster, *machinePool, *dockerMachinePool) if err != nil { - return ctrl.Result{}, err + return err } if len(dockerMachineList.Items) > 0 { @@ -229,10 +230,9 @@ func (r *DockerMachinePoolReconciler) reconcileDelete(ctx context.Context, clust } if len(errs) > 0 { - return ctrl.Result{}, kerrors.NewAggregate(errs) + return kerrors.NewAggregate(errs) } - - return ctrl.Result{RequeueAfter: requeueAfter}, nil + return nil } // Once there are no DockerMachines left, ensure there are no Docker containers left behind. @@ -243,21 +243,21 @@ func (r *DockerMachinePoolReconciler) reconcileDelete(ctx context.Context, clust // List Docker containers, i.e. external machines in the cluster. externalMachines, err := docker.ListMachinesByCluster(ctx, cluster, labelFilters) if err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to list all machines in the cluster with label \"%s:%s\"", dockerMachinePoolLabel, dockerMachinePool.Name) + return errors.Wrapf(err, "failed to list all machines in the cluster with label \"%s:%s\"", dockerMachinePoolLabel, dockerMachinePool.Name) } // Providers should similarly ensure that all infrastructure instances are deleted even if the InfraMachine has not been created yet. for _, externalMachine := range externalMachines { log.Info("Deleting Docker container", "container", externalMachine.Name()) if err := externalMachine.Delete(ctx); err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to delete machine %s", externalMachine.Name()) + return errors.Wrapf(err, "failed to delete machine %s", externalMachine.Name()) } } // Once all DockerMachines and Docker containers are deleted, remove the finalizer. controllerutil.RemoveFinalizer(dockerMachinePool, infraexpv1.MachinePoolFinalizer) - return ctrl.Result{}, nil + return nil } func (r *DockerMachinePoolReconciler) reconcileNormal(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) (ctrl.Result, error) { @@ -318,11 +318,7 @@ func (r *DockerMachinePoolReconciler) reconcileNormal(ctx context.Context, clust return ctrl.Result{}, nil } - dockerMachinePool.Status.Ready = false - conditions.MarkFalse(dockerMachinePool, expv1.ReplicasReadyCondition, expv1.WaitingForReplicasReadyReason, clusterv1.ConditionSeverityInfo, "") - - // if some machine is still provisioning, force reconcile in few seconds to check again infrastructure. - return ctrl.Result{RequeueAfter: requeueAfter}, nil + return r.updateStatus(ctx, cluster, machinePool, dockerMachinePool, dockerMachineList.Items) } func getDockerMachines(ctx context.Context, c client.Client, cluster clusterv1.Cluster, machinePool expv1.MachinePool, dockerMachinePool infraexpv1.DockerMachinePool) (*infrav1.DockerMachineList, error) { @@ -380,6 +376,64 @@ func dockerMachineToDockerMachinePool(_ context.Context, o client.Object) []ctrl return nil } +// updateStatus updates the Status field for the MachinePool object. +// It checks for the current state of the replicas and updates the Status of the MachineSet. +func (r *DockerMachinePoolReconciler) updateStatus(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool, dockerMachines []infrav1.DockerMachine) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx) + + // List the Docker containers. This corresponds to an InfraMachinePool instance for providers. + labelFilters := map[string]string{dockerMachinePoolLabel: dockerMachinePool.Name} + externalMachines, err := docker.ListMachinesByCluster(ctx, cluster, labelFilters) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to list all external machines in the cluster") + } + + externalMachineMap := make(map[string]*docker.Machine) + for _, externalMachine := range externalMachines { + externalMachineMap[externalMachine.Name()] = externalMachine + } + // We can use reuse getDeletionCandidates to get the list of ready DockerMachines and avoid another API call, even though we aren't deleting them here. + _, readyMachines, err := r.getDeletionCandidates(ctx, dockerMachines, externalMachineMap, machinePool, dockerMachinePool) + if err != nil { + return ctrl.Result{}, err + } + + readyReplicaCount := len(readyMachines) + desiredReplicas := int(*machinePool.Spec.Replicas) + + switch { + // We are scaling up + case readyReplicaCount < desiredReplicas: + conditions.MarkFalse(dockerMachinePool, clusterv1.ResizedCondition, clusterv1.ScalingUpReason, clusterv1.ConditionSeverityWarning, "Scaling up MachineSet to %d replicas (actual %d)", desiredReplicas, readyReplicaCount) + // We are scaling down + case readyReplicaCount > desiredReplicas: + conditions.MarkFalse(dockerMachinePool, clusterv1.ResizedCondition, clusterv1.ScalingDownReason, clusterv1.ConditionSeverityWarning, "Scaling down MachineSet to %d replicas (actual %d)", desiredReplicas, readyReplicaCount) + default: + // Make sure last resize operation is marked as completed. + // NOTE: we are checking the number of machines ready so we report resize completed only when the machines + // are actually provisioned (vs reporting completed immediately after the last machine object is created). This convention is also used by KCP. + if len(dockerMachines) == readyReplicaCount { + if conditions.IsFalse(dockerMachinePool, clusterv1.ResizedCondition) { + log.Info("All the replicas are ready", "replicas", readyReplicaCount) + } + conditions.MarkTrue(dockerMachinePool, clusterv1.ResizedCondition) + } + // This means that there was no error in generating the desired number of machine objects + conditions.MarkTrue(dockerMachinePool, clusterv1.MachinesCreatedCondition) + } + + getters := make([]conditions.Getter, 0, len(dockerMachines)) + for i := range dockerMachines { + getters = append(getters, &dockerMachines[i]) + } + + // Aggregate the operational state of all the machines; while aggregating we are adding the + // source ref (reason@machine/name) so the problem can be easily tracked down to its source machine. + conditions.SetAggregate(dockerMachinePool, expv1.ReplicasReadyCondition, getters, conditions.AddSourceRef()) + + return ctrl.Result{}, nil +} + func patchDockerMachinePool(ctx context.Context, patchHelper *patch.Helper, dockerMachinePool *infraexpv1.DockerMachinePool) error { conditions.SetSummary(dockerMachinePool, conditions.WithConditions( diff --git a/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller_phases.go b/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller_phases.go index 1a7201c53ec6..414a44c164e7 100644 --- a/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller_phases.go +++ b/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller_phases.go @@ -33,6 +33,7 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" + "sigs.k8s.io/cluster-api/internal/util/ssa" infrav1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/api/v1beta1" infraexpv1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/exp/api/v1beta1" "sigs.k8s.io/cluster-api/test/infrastructure/docker/internal/docker" @@ -142,16 +143,22 @@ func (r *DockerMachinePoolReconciler) reconcileDockerMachines(ctx context.Contex // Create a DockerMachine for each Docker container so we surface the information to the user. Use the same name as the Docker container for the Docker Machine for ease of lookup. // Providers should iterate through their infrastructure instances and ensure that each instance has a corresponding InfraMachine. for _, machine := range externalMachines { - if _, ok := dockerMachineMap[machine.Name()]; !ok { + if existingMachine, ok := dockerMachineMap[machine.Name()]; ok { + log.V(2).Info("Patching existing DockerMachine", "name", existingMachine.Name) + desiredMachine := computeDesiredDockerMachine(machine.Name(), cluster, machinePool, dockerMachinePool, &existingMachine) + if err := ssa.Patch(ctx, r.Client, dockerMachinePoolControllerName, desiredMachine, ssa.WithCachingProxy{Cache: r.ssaCache, Original: &existingMachine}); err != nil { + return errors.Wrapf(err, "failed to update DockerMachine %q", klog.KObj(desiredMachine)) + } + + dockerMachineMap[desiredMachine.Name] = *desiredMachine + } else { log.V(2).Info("Creating a new DockerMachine for Docker container", "container", machine.Name()) - dockerMachine, err := r.createDockerMachine(ctx, machine.Name(), cluster, machinePool, dockerMachinePool) - if err != nil { + desiredMachine := computeDesiredDockerMachine(machine.Name(), cluster, machinePool, dockerMachinePool, nil) + if err := ssa.Patch(ctx, r.Client, dockerMachinePoolControllerName, desiredMachine); err != nil { return errors.Wrap(err, "failed to create a new docker machine") } - dockerMachineMap[dockerMachine.Name] = *dockerMachine - } else { - log.V(4).Info("DockerMachine already exists, nothing to do", "name", machine.Name()) + dockerMachineMap[desiredMachine.Name] = *desiredMachine } } @@ -233,30 +240,15 @@ func (r *DockerMachinePoolReconciler) reconcileDockerMachines(ctx context.Contex return nil } -// createDockerMachine creates a DockerMachine to represent a Docker container in a DockerMachinePool. +// computeDesiredDockerMachine creates a DockerMachine to represent a Docker container in a DockerMachinePool. // These DockerMachines have the clusterv1.ClusterNameLabel and clusterv1.MachinePoolNameLabel to support MachinePool Machines. -func (r *DockerMachinePoolReconciler) createDockerMachine(ctx context.Context, name string, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) (*infrav1.DockerMachine, error) { - log := ctrl.LoggerFrom(ctx) - - labels := map[string]string{ - clusterv1.ClusterNameLabel: cluster.Name, - clusterv1.MachinePoolNameLabel: format.MustFormatValue(machinePool.Name), - } +func computeDesiredDockerMachine(name string, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool, existingDockerMachine *infrav1.DockerMachine) *infrav1.DockerMachine { dockerMachine := &infrav1.DockerMachine{ ObjectMeta: metav1.ObjectMeta{ Namespace: dockerMachinePool.Namespace, Name: name, - Labels: labels, + Labels: make(map[string]string), Annotations: make(map[string]string), - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: dockerMachinePool.APIVersion, - Kind: dockerMachinePool.Kind, - Name: dockerMachinePool.Name, - UID: dockerMachinePool.UID, - }, - // Note: Since the MachinePool controller has not created its owner Machine yet, we want to set the DockerMachinePool as the owner so it's not orphaned. - }, }, Spec: infrav1.DockerMachineSpec{ CustomImage: dockerMachinePool.Spec.Template.CustomImage, @@ -265,13 +257,22 @@ func (r *DockerMachinePoolReconciler) createDockerMachine(ctx context.Context, n }, } - log.V(2).Info("Creating DockerMachine", "dockerMachine", dockerMachine.Name) - - if err := r.Client.Create(ctx, dockerMachine); err != nil { - return nil, errors.Wrap(err, "failed to create dockerMachine") + if existingDockerMachine != nil { + dockerMachine.SetUID(existingDockerMachine.UID) + dockerMachine.SetOwnerReferences(existingDockerMachine.OwnerReferences) } - return dockerMachine, nil + // Note: Since the MachinePool controller has not created its owner Machine yet, we want to set the DockerMachinePool as the owner so it's not orphaned. + dockerMachine.SetOwnerReferences(util.EnsureOwnerRef(dockerMachine.OwnerReferences, metav1.OwnerReference{ + APIVersion: dockerMachinePool.APIVersion, + Kind: dockerMachinePool.Kind, + Name: dockerMachinePool.Name, + UID: dockerMachinePool.UID, + })) + dockerMachine.Labels[clusterv1.ClusterNameLabel] = cluster.Name + dockerMachine.Labels[clusterv1.MachinePoolNameLabel] = format.MustFormatValue(machinePool.Name) + + return dockerMachine } // deleteMachinePoolMachine attempts to delete a DockerMachine and its associated owner Machine if it exists.