Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🌱 Remove requeues in DockerMachinePool #9725

Merged
merged 1 commit into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ package controllers
import (
"context"
"fmt"
"time"

"github.com/pkg/errors"
apierrors "k8s.io/apimachinery/pkg/api/errors"
Expand All @@ -43,6 +42,7 @@ import (
"sigs.k8s.io/cluster-api/controllers/remote"
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
utilexp "sigs.k8s.io/cluster-api/exp/util"
"sigs.k8s.io/cluster-api/internal/util/ssa"
"sigs.k8s.io/cluster-api/test/infrastructure/container"
infrav1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/api/v1beta1"
infraexpv1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/exp/api/v1beta1"
Expand All @@ -57,8 +57,7 @@ const (
// dockerMachinePoolLabel is the label used to identify the DockerMachinePool that is responsible for a Docker container.
dockerMachinePoolLabel = "docker.cluster.x-k8s.io/machine-pool"

// requeueAfter is how long to wait before checking again to see if the DockerMachines are still provisioning or deleting.
requeueAfter = 10 * time.Second
dockerMachinePoolControllerName = "dockermachinepool-controller"
)

// DockerMachinePoolReconciler reconciles a DockerMachinePool object.
Expand All @@ -71,6 +70,7 @@ type DockerMachinePoolReconciler struct {
// WatchFilterValue is the label value used to filter events prior to reconciliation.
WatchFilterValue string

ssaCache ssa.Cache
recorder record.EventRecorder
externalTracker external.ObjectTracker
}
Expand Down Expand Up @@ -140,7 +140,7 @@ func (r *DockerMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Re

// Handle deleted machines
if !dockerMachinePool.ObjectMeta.DeletionTimestamp.IsZero() {
return r.reconcileDelete(ctx, cluster, machinePool, dockerMachinePool)
return ctrl.Result{}, r.reconcileDelete(ctx, cluster, machinePool, dockerMachinePool)
}

// Add finalizer and the InfrastructureMachineKind if they aren't already present, and requeue if either were added.
Expand Down Expand Up @@ -194,21 +194,22 @@ func (r *DockerMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr
return errors.Wrap(err, "failed setting up with a controller manager")
}

r.recorder = mgr.GetEventRecorderFor("dockermachinepool-controller")
r.recorder = mgr.GetEventRecorderFor(dockerMachinePoolControllerName)
r.externalTracker = external.ObjectTracker{
Controller: c,
Cache: mgr.GetCache(),
}
r.ssaCache = ssa.NewCache()

return nil
}

func (r *DockerMachinePoolReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) (ctrl.Result, error) {
func (r *DockerMachinePoolReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) error {
log := ctrl.LoggerFrom(ctx)

dockerMachineList, err := getDockerMachines(ctx, r.Client, *cluster, *machinePool, *dockerMachinePool)
if err != nil {
return ctrl.Result{}, err
return err
}

if len(dockerMachineList.Items) > 0 {
Expand All @@ -229,10 +230,9 @@ func (r *DockerMachinePoolReconciler) reconcileDelete(ctx context.Context, clust
}

if len(errs) > 0 {
return ctrl.Result{}, kerrors.NewAggregate(errs)
return kerrors.NewAggregate(errs)
}
CecileRobertMichon marked this conversation as resolved.
Show resolved Hide resolved

return ctrl.Result{RequeueAfter: requeueAfter}, nil
return nil
}

// Once there are no DockerMachines left, ensure there are no Docker containers left behind.
Expand All @@ -243,21 +243,21 @@ func (r *DockerMachinePoolReconciler) reconcileDelete(ctx context.Context, clust
// List Docker containers, i.e. external machines in the cluster.
externalMachines, err := docker.ListMachinesByCluster(ctx, cluster, labelFilters)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to list all machines in the cluster with label \"%s:%s\"", dockerMachinePoolLabel, dockerMachinePool.Name)
return errors.Wrapf(err, "failed to list all machines in the cluster with label \"%s:%s\"", dockerMachinePoolLabel, dockerMachinePool.Name)
}

// Providers should similarly ensure that all infrastructure instances are deleted even if the InfraMachine has not been created yet.
for _, externalMachine := range externalMachines {
log.Info("Deleting Docker container", "container", externalMachine.Name())
if err := externalMachine.Delete(ctx); err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to delete machine %s", externalMachine.Name())
return errors.Wrapf(err, "failed to delete machine %s", externalMachine.Name())
}
}

// Once all DockerMachines and Docker containers are deleted, remove the finalizer.
controllerutil.RemoveFinalizer(dockerMachinePool, infraexpv1.MachinePoolFinalizer)

return ctrl.Result{}, nil
return nil
}

func (r *DockerMachinePoolReconciler) reconcileNormal(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) (ctrl.Result, error) {
Expand Down Expand Up @@ -318,11 +318,7 @@ func (r *DockerMachinePoolReconciler) reconcileNormal(ctx context.Context, clust
return ctrl.Result{}, nil
}

dockerMachinePool.Status.Ready = false
conditions.MarkFalse(dockerMachinePool, expv1.ReplicasReadyCondition, expv1.WaitingForReplicasReadyReason, clusterv1.ConditionSeverityInfo, "")

// if some machine is still provisioning, force reconcile in few seconds to check again infrastructure.
return ctrl.Result{RequeueAfter: requeueAfter}, nil
return r.updateStatus(ctx, cluster, machinePool, dockerMachinePool, dockerMachineList.Items)
}

func getDockerMachines(ctx context.Context, c client.Client, cluster clusterv1.Cluster, machinePool expv1.MachinePool, dockerMachinePool infraexpv1.DockerMachinePool) (*infrav1.DockerMachineList, error) {
Expand Down Expand Up @@ -380,6 +376,64 @@ func dockerMachineToDockerMachinePool(_ context.Context, o client.Object) []ctrl
return nil
}

// updateStatus updates the Status field for the MachinePool object.
// It checks for the current state of the replicas and updates the Status of the MachineSet.
func (r *DockerMachinePoolReconciler) updateStatus(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool, dockerMachines []infrav1.DockerMachine) (ctrl.Result, error) {
log := ctrl.LoggerFrom(ctx)

// List the Docker containers. This corresponds to an InfraMachinePool instance for providers.
labelFilters := map[string]string{dockerMachinePoolLabel: dockerMachinePool.Name}
externalMachines, err := docker.ListMachinesByCluster(ctx, cluster, labelFilters)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to list all external machines in the cluster")
}

externalMachineMap := make(map[string]*docker.Machine)
for _, externalMachine := range externalMachines {
externalMachineMap[externalMachine.Name()] = externalMachine
}
// We can use reuse getDeletionCandidates to get the list of ready DockerMachines and avoid another API call, even though we aren't deleting them here.
_, readyMachines, err := r.getDeletionCandidates(ctx, dockerMachines, externalMachineMap, machinePool, dockerMachinePool)
Jont828 marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return ctrl.Result{}, err
}

readyReplicaCount := len(readyMachines)
desiredReplicas := int(*machinePool.Spec.Replicas)

switch {
// We are scaling up
case readyReplicaCount < desiredReplicas:
conditions.MarkFalse(dockerMachinePool, clusterv1.ResizedCondition, clusterv1.ScalingUpReason, clusterv1.ConditionSeverityWarning, "Scaling up MachineSet to %d replicas (actual %d)", desiredReplicas, readyReplicaCount)
// We are scaling down
case readyReplicaCount > desiredReplicas:
conditions.MarkFalse(dockerMachinePool, clusterv1.ResizedCondition, clusterv1.ScalingDownReason, clusterv1.ConditionSeverityWarning, "Scaling down MachineSet to %d replicas (actual %d)", desiredReplicas, readyReplicaCount)
default:
// Make sure last resize operation is marked as completed.
// NOTE: we are checking the number of machines ready so we report resize completed only when the machines
// are actually provisioned (vs reporting completed immediately after the last machine object is created). This convention is also used by KCP.
if len(dockerMachines) == readyReplicaCount {
if conditions.IsFalse(dockerMachinePool, clusterv1.ResizedCondition) {
log.Info("All the replicas are ready", "replicas", readyReplicaCount)
}
conditions.MarkTrue(dockerMachinePool, clusterv1.ResizedCondition)
}
// This means that there was no error in generating the desired number of machine objects
conditions.MarkTrue(dockerMachinePool, clusterv1.MachinesCreatedCondition)
}

getters := make([]conditions.Getter, 0, len(dockerMachines))
for i := range dockerMachines {
getters = append(getters, &dockerMachines[i])
}

// Aggregate the operational state of all the machines; while aggregating we are adding the
// source ref (reason@machine/name) so the problem can be easily tracked down to its source machine.
conditions.SetAggregate(dockerMachinePool, expv1.ReplicasReadyCondition, getters, conditions.AddSourceRef())

return ctrl.Result{}, nil
}

func patchDockerMachinePool(ctx context.Context, patchHelper *patch.Helper, dockerMachinePool *infraexpv1.DockerMachinePool) error {
conditions.SetSummary(dockerMachinePool,
conditions.WithConditions(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
"sigs.k8s.io/cluster-api/internal/util/ssa"
infrav1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/api/v1beta1"
infraexpv1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/exp/api/v1beta1"
"sigs.k8s.io/cluster-api/test/infrastructure/docker/internal/docker"
Expand Down Expand Up @@ -142,16 +143,22 @@ func (r *DockerMachinePoolReconciler) reconcileDockerMachines(ctx context.Contex
// Create a DockerMachine for each Docker container so we surface the information to the user. Use the same name as the Docker container for the Docker Machine for ease of lookup.
// Providers should iterate through their infrastructure instances and ensure that each instance has a corresponding InfraMachine.
for _, machine := range externalMachines {
if _, ok := dockerMachineMap[machine.Name()]; !ok {
if existingMachine, ok := dockerMachineMap[machine.Name()]; ok {
log.V(2).Info("Patching existing DockerMachine", "name", existingMachine.Name)
desiredMachine := computeDesiredDockerMachine(machine.Name(), cluster, machinePool, dockerMachinePool, &existingMachine)
if err := ssa.Patch(ctx, r.Client, dockerMachinePoolControllerName, desiredMachine, ssa.WithCachingProxy{Cache: r.ssaCache, Original: &existingMachine}); err != nil {
return errors.Wrapf(err, "failed to update DockerMachine %q", klog.KObj(desiredMachine))
}

dockerMachineMap[desiredMachine.Name] = *desiredMachine
} else {
log.V(2).Info("Creating a new DockerMachine for Docker container", "container", machine.Name())
dockerMachine, err := r.createDockerMachine(ctx, machine.Name(), cluster, machinePool, dockerMachinePool)
if err != nil {
desiredMachine := computeDesiredDockerMachine(machine.Name(), cluster, machinePool, dockerMachinePool, nil)
if err := ssa.Patch(ctx, r.Client, dockerMachinePoolControllerName, desiredMachine); err != nil {
return errors.Wrap(err, "failed to create a new docker machine")
}

dockerMachineMap[dockerMachine.Name] = *dockerMachine
} else {
log.V(4).Info("DockerMachine already exists, nothing to do", "name", machine.Name())
dockerMachineMap[desiredMachine.Name] = *desiredMachine
}
}

Expand Down Expand Up @@ -233,30 +240,15 @@ func (r *DockerMachinePoolReconciler) reconcileDockerMachines(ctx context.Contex
return nil
}

// createDockerMachine creates a DockerMachine to represent a Docker container in a DockerMachinePool.
// computeDesiredDockerMachine creates a DockerMachine to represent a Docker container in a DockerMachinePool.
// These DockerMachines have the clusterv1.ClusterNameLabel and clusterv1.MachinePoolNameLabel to support MachinePool Machines.
func (r *DockerMachinePoolReconciler) createDockerMachine(ctx context.Context, name string, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) (*infrav1.DockerMachine, error) {
log := ctrl.LoggerFrom(ctx)

labels := map[string]string{
clusterv1.ClusterNameLabel: cluster.Name,
clusterv1.MachinePoolNameLabel: format.MustFormatValue(machinePool.Name),
}
func computeDesiredDockerMachine(name string, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool, existingDockerMachine *infrav1.DockerMachine) *infrav1.DockerMachine {
dockerMachine := &infrav1.DockerMachine{
ObjectMeta: metav1.ObjectMeta{
Namespace: dockerMachinePool.Namespace,
Name: name,
Labels: labels,
Labels: make(map[string]string),
Annotations: make(map[string]string),
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: dockerMachinePool.APIVersion,
Kind: dockerMachinePool.Kind,
Name: dockerMachinePool.Name,
UID: dockerMachinePool.UID,
},
// Note: Since the MachinePool controller has not created its owner Machine yet, we want to set the DockerMachinePool as the owner so it's not orphaned.
},
},
Spec: infrav1.DockerMachineSpec{
CustomImage: dockerMachinePool.Spec.Template.CustomImage,
Expand All @@ -265,13 +257,22 @@ func (r *DockerMachinePoolReconciler) createDockerMachine(ctx context.Context, n
},
}

log.V(2).Info("Creating DockerMachine", "dockerMachine", dockerMachine.Name)

if err := r.Client.Create(ctx, dockerMachine); err != nil {
return nil, errors.Wrap(err, "failed to create dockerMachine")
if existingDockerMachine != nil {
dockerMachine.SetUID(existingDockerMachine.UID)
dockerMachine.SetOwnerReferences(existingDockerMachine.OwnerReferences)
}

return dockerMachine, nil
// Note: Since the MachinePool controller has not created its owner Machine yet, we want to set the DockerMachinePool as the owner so it's not orphaned.
dockerMachine.SetOwnerReferences(util.EnsureOwnerRef(dockerMachine.OwnerReferences, metav1.OwnerReference{
APIVersion: dockerMachinePool.APIVersion,
Kind: dockerMachinePool.Kind,
Name: dockerMachinePool.Name,
UID: dockerMachinePool.UID,
}))
dockerMachine.Labels[clusterv1.ClusterNameLabel] = cluster.Name
dockerMachine.Labels[clusterv1.MachinePoolNameLabel] = format.MustFormatValue(machinePool.Name)

return dockerMachine
}

// deleteMachinePoolMachine attempts to delete a DockerMachine and its associated owner Machine if it exists.
Expand Down
Loading